🕸️

[python] 크롤링 - 가온차트 2

# -*- encoding: utf-8 -*- from urllib.request import urlopen from bs4 import BeautifulSoup import csv import pandas as pd find_year = ['2019','2020','2021'] find_week = '12' # 1 ~ 52 for year in find_year: for i in range(1,55): week = str(i) html = urlopen("http://gaonchart.co.kr/main/section/chart/online.gaon?nationGbn=T&serviceGbn=ALL&targetTime="+week+"&hitYear="+year+"&termGbn=week") print(html) #bsObject = BeautifulSoup(html, "html.parser") bsObject = BeautifulSoup(html, "lxml", from_encoding='utf-8') cnt_artist = 0 cnt_title = 0 singer = [] count = [] song = [] for link1 in bsObject.find_all(name="td", attrs={"class":"subject"}): cnt_artist +=1 cnt_title += 1 singer.append(link1.find(name="p",attrs={"class":"singer"}).text) cnt_artist = 0 cnt_title = 0 for link2 in bsObject.find_all(name="td", attrs={"class":"count"}): cnt_artist +=1 cnt_title += 1 count.append(link2.find('p').text) cnt_artist = 0 cnt_title = 0 for link3 in bsObject.find_all(name="td", attrs={"class":"subject"}): cnt_artist +=1 cnt_title += 1 song.append(link3.find('p').text) gaon_list = list(zip(song,singer,count)) #print(data) data = pd.DataFrame(gaon_list) data = data.assign(year=year, week=week) data.columns = ['song','singer','count','year','week'] filename = 'gaon'+year+week+'.csv' data.to_csv(filename, encoding='euc-kr')
Python
복사