# -*- encoding: utf-8 -*-
from urllib.request import urlopen
from bs4 import BeautifulSoup
#import csv
import pandas as pd
html = urlopen("https://www.kyobobook.co.kr/bestSellerNew/bestseller.laf")
#bsObject = BeautifulSoup(html, "html.parser")
bsObject = BeautifulSoup(html, "lxml", from_encoding='utf-8')
#print(bsObject)
book_page_urls = []
for cover in bsObject.find_all('div',{'class':'detail'}):
link = cover.select('a')[0].get('href')
book_page_urls.append(link)
book_info = []
for index, book_page_urls in enumerate(book_page_urls):
html = urlopen(book_page_urls)
bsObject = BeautifulSoup(html, "lxml", from_encoding='euc-kr')
title = bsObject.find('meta',{'property':'og:title'}).get('content')
author = bsObject.select('span.name a')[0].text
image = bsObject.find('meta',{'property':'og:image'}).get('content')
url = bsObject.find('meta',{'property':'og:url'}).get('content')
Price = bsObject.find('meta',{'property':'og:price'}).get('content')
print(index+1, title, author, image, url, Price)
book_info.append([index+1,title,author,image,url,Price])
#field = ['INDEX','TITLE','AUTHOR','IMAGE','URL','PRICE']
data = pd.DataFrame(book_info)
data.columns = ['INDEX','TITLE','AUTHOR','IMAGE','URL','PRICE']
data.to_csv('result2.csv', encoding='euc-kr')
Python
복사
같은 카테고리 글 보기
Table
Search