Commit 3e313ffb authored by jackfrued's avatar jackfrued

更新了爬虫部分代码

parent 98dc244c
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from io import StringIO
from urllib.parse import urlencode from urllib.parse import urlencode
import re
import scrapy import scrapy
...@@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider): ...@@ -26,6 +28,9 @@ class TaobaoSpider(scrapy.Spider):
item = GoodsItem() item = GoodsItem()
item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first() item['price'] = goods.xpath('div[5]/div[2]/div[1]/div[1]/strong/text()').extract_first()
item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first() item['deal'] = goods.xpath('div[5]/div[2]/div[1]/div[2]/text()').extract_first()
item['title'] = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract_first() segments = goods.xpath('div[6]/div[2]/div[2]/a/text()').extract()
title = StringIO()
for segment in segments:
title.write(re.sub('\s', '', segment))
item['title'] = title.getvalue()
yield item yield item
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment