🕸️

[python] 크롤링 - 교보문고

# -*- encoding: utf-8 -*- from urllib.request import urlopen from bs4 import BeautifulSoup #import csv import pandas as pd html = urlopen("https://www.kyobobook.co.kr/bestSellerNew/bestseller.laf") #bsObject = BeautifulSoup(html, "html.parser") bsObject = BeautifulSoup(html, "lxml", from_encoding='utf-8') #print(bsObject) book_page_urls = [] for cover in bsObject.find_all('div',{'class':'detail'}): link = cover.select('a')[0].get('href') book_page_urls.append(link) book_info = [] for index, book_page_urls in enumerate(book_page_urls): html = urlopen(book_page_urls) bsObject = BeautifulSoup(html, "lxml", from_encoding='euc-kr') title = bsObject.find('meta',{'property':'og:title'}).get('content') author = bsObject.select('span.name a')[0].text image = bsObject.find('meta',{'property':'og:image'}).get('content') url = bsObject.find('meta',{'property':'og:url'}).get('content') Price = bsObject.find('meta',{'property':'og:price'}).get('content') print(index+1, title, author, image, url, Price) book_info.append([index+1,title,author,image,url,Price]) #field = ['INDEX','TITLE','AUTHOR','IMAGE','URL','PRICE'] data = pd.DataFrame(book_info) data.columns = ['INDEX','TITLE','AUTHOR','IMAGE','URL','PRICE'] data.to_csv('result2.csv', encoding='euc-kr')
Python
복사
같은 카테고리 글 보기
Search
게시글
Editor
Tags
edit
last edit
K
kim jaehyun
python
2022/01/26
2022/01/26 08:00
K
kim jaehyun
python
2022/01/26
2022/01/26 08:00
K
kim jaehyun
python
2022/01/26
2022/02/05 03:22
K
kim jaehyun
python
2022/01/26
2022/01/26 07:16