根据现有代码修改,异步存储测试通过
hebgaoyan opened this issue · 2 comments
hebgaoyan commented
from twisted.internet import defer,reactor
class MyspiderPipeline(object):
def init(self):
host = settings['MONGODB_HOST']
port = settings['MONGODB_PORT']
dbname = settings['MONGODB_DBNAME']
docname = settings['MONGODB_DOCNAME']
self.client = pymongo.MongoClient(host=host,port=port)
db = self.client[dbname]
self.post = db[docname]
def close_spider(self, spider):
self.client.close()
# 下面的操作是重点
@defer.inlineCallbacks
def process_item(self, item, spider):
out = defer.Deferred()
reactor.callInThread(self._insert, item, out, spider)
yield out
defer.returnValue(item)
return item
def _insert(self, item, out, spider):
time.sleep(10)
try:
data = dict(item)
self.post.insert(data)
reactor.callFromThread(out.callback, item)
except BaseException:
# 索引相同,即为重复数据,捕获错误
spider.logger.debug('duplicate key error collection')
reactor.callFromThread(out.callback, item)
GoldenNotebook commented
hebgaoyan commented
我是直接按项目demo跑的,没有具体分析过。看代码里,案件列表的请求url是:http://wenshu.court.gov.cn/List/ListContent,没有考虑后面那一串