-
카테고리
-
세부 분야
데이터 엔지니어링
두번째 Request의 callback 부분이 작동을 안합니다..
19.10.08 14:42 작성 조회수 16
0
삭제된 글입니다
답변을 작성해보세요.
0
Seungwoon Song
질문자2019.10.08
# -*- coding: utf-8 -*-
import scrapy
class GmarketCategoryAllSpider(scrapy.Spider):
name = 'gmarket_category_all'
def start_requests(self):
yield scrapy.Request(url='http://corners.gmarket.co.kr/Bestsellers', callback = self.parse)
def parse(self, response):
print("parse1")
category_links = response.css('#categoryTabG li a ::attr(href)').getall()
category_names = response.css('#categoryTabG li a ::text').getall()
for index, category_link in enumerate(category_links):
yield scrapy.Request(url = 'http://corners.gmarket.co.kr/'+category_link, callback = self.parse_item, meta = {'category_name': category_names[index]})
#이런식으로 Recursive하게 작동시킬 수 있다.
for index, category_link in enumerate(category_links):
yield scrapy.Request(url = 'http://corners.gmarket.co.kr/'+category_link, callback = self.parse_subcategory, meta = {'category_name': category_names[index]})
def parse_subcategory(self, response):
print("sub category 진입")
subcategory_links = response.css('div.navi.group > ul > li::attr(href)').getall()
subcategory_texts = response.css('div.navu.group > ul > li::text').getall()
for index,subcategory_link in enumerate(subcategory_links):
yield scrapy.Request(url='http://corners.gmarket.co.kr'+subcategory_link, callback=self.parse_item, meta={'category_name': 'subcategory'+subcategory_texts[index]} )
def parse_item(self,response):
print("parse_item", response.meta['category_name'])
page_item = response.css('div.best-list')
for index,item in enumerate(page_item[1].css('li')):
rank = index + 1
title = item.css('a.itemname::text').get()
origin_price = item.css('div.o-price span span::text').get()
sale_price = item.css('div.s-price strong span span::text').get()
percentage = item.css('div.s-price em::text').get()
if(origin_price == None):
origin_price = sale_price
if(percentage == None):
percentage = '0'
else:
percentage = percentage.replace("%", "")
origin_price = origin_price.replace("원","").replace(",","")
sale_price = sale_price.replace("원", "").replace(",", "")
# print(rank, title, origin_price, sale_price, percentage)
전체 코드는 위처럼 짰습니다.
제가 놓친 부분이 있을까요?
답변 1