Gmarket 크롤링 실습문제

안녕하세요.

공유드린 자료의 코드를 기반으로, 크롤링 예제를 보여드리고 있는데요.

해당 영상에 첨부드린 crawling_crawling.ipynb 코드와 지금 공유해주신 코드와 달라보여요.

첨부해드리고 영상에서 설명드린 코드는 정상적으로 동작해서요. 혹시 제가 착각하는 것이라면, 새로운 질문으로 올려주시면 되겠지만, 카테고리도 달라보이고 (즉, 다른 링크로 크롤링을 하신 듯 하고), css selector 도 강의에서 설명드린 css selector 와 달라보입니다. 확인부탁드려요. 여기까지 들으셨다면, 성공하신 것 같아요. 감사합니다.

import requests

from bs4 import BeautifulSoup

res = requests.get('http://corners.gmarket.co.kr/Bestsellers?viewType=G&groupCode=G06')

soup = BeautifulSoup(res.content, 'html.parser')

bestlists = soup.select('div.best-list')

bestitems = bestlists[1]

products = bestitems.select('ul > li')

for index, product in enumerate(products):

title = product.select_one('a.itemname')

price = product.select_one('div.s-price > strong')

print (title.get_text(), price.get_text(), title['href'])

import requests

from bs4 import BeautifulSoup

import re # 2020.07.25 업데이트 (지마켓 일부 상품 태그 변경, 공지사항 참조부탁드림)

link_re = re.compile('^http://') # 2020.07.25 업데이트 (지마켓 일부 상품 태그 변경, 공지사항 참조부탁드림)

res = requests.get('http://corners.gmarket.co.kr/Bestsellers?viewType=G&groupCode=G06')

soup = BeautifulSoup(res.content, 'html.parser')

bestlists = soup.select('div.best-list')

bestitems = bestlists[1]

products = bestitems.select('ul > li')

for index, product in enumerate(products):

title = product.select_one('a.itemname')

price = product.select_one('div.s-price > strong')

if link_re.match(title['href']): # 2020.07.25 업데이트 (지마켓 일부 상품 태그 변경, 공지사항 참조부탁드림)

res_info = requests.get(title['href'])

soup_info = BeautifulSoup(res_info.content, 'html.parser')

provider_info = soup_info.select_one('div.item-topinfo > div.item-topinfo_headline > p > a > strong')

print (title.get_text(), price.get_text(), title['href'], provider_info.get_text())

인프런 커뮤니티 질문&답변