Python3 Scrapy框架中进入callback函数，但第一级取的值存入数据库中都重复 - 知否问答 - 万象云+社区

-- coding: utf-8 --

import scrapy,re
from scrapy.selector import Selector
from scrapy.http import Request
from novelspider.items import NovelspiderItem
import re

class novelSpider(scrapy.Spider):

name = 'novelSpider' allowed_domains = ["www.tibetif.com"] url = "http://www.tibetif.com/index.php?m=content&c=index&a=lists&catid=955" start_urls = [url] def parse(self, response): item = NovelspiderItem() list = response.xpath(".//table[@class='proinfo']/tbody/tr") for i in range(2,len(list)): item['projectNo'] = list[i].xpath(".//td/text()").extract()[0].strip() item['url'] = list[i].xpath(".//td[2]/a/@href").extract()[0] item['title'] = list[i].xpath(".//td[2]/a/text()").extract()[0] item['money'] = list[i].xpath(".//td[4]/text()").extract()[0] item['date'] = list[i].xpath(".//td[5]/text()").extract()[0] yield scrapy.Request(item['url'],meta={'item':item},callback=self.detail) def detail(self,response): item = response.meta['item'] selector = Selector(response) text = selector.xpath(".//div[@class='wrapper']/table[3]").xpath('string(.)').extract()[0].replace("\n"," ").replace("\t"," ").replace("\r"," ").strip('\n').strip('\t').strip('\r').lstrip().rstrip() print(item['title']) companyObj = re.search(r'标的企业名称\s*(\S+)',text) if companyObj: item['company'] = str(companyObj.group(1)) priceObj = re.search(r'注册资本\(元\)\s*(\S+)',text) if priceObj: item['price'] = str(priceObj.group(1)) return item

问题：Python3 Scrapy框架中不进入yield scrapy.Request(item['url'],meta={'item':item},callback=self.detail)中的callback=self.detail函数，导致数据库中在parse函数中取得的值都是最后一个页面的值

回答动态