Unhandled error in Deferred when using py_cli
lmst2 opened this issue · 1 comments
lmst2 commented
using master branch
使用master分支
set up using docker, puting my own scrapy project in the container, and setting up py_cli using middleware.
使用docker部署,把原来的scrapy项目放入容器中,使用middleware加载py_cli.
the spider works find outside the container and raise Unhandled error in Deferred
when loading the middleware.
爬虫在容器外部完美运行,但是在容器内部在加载我自定义的middleware时出现这个错误,请大家帮忙看一下到底是什么问题
code in middleware
middleware中的代码
from haipproxy.client import ProxyFetcher
class HaipProxyMiddleware(object):
def __init__(self):
args = dict(host='127.0.0.1', port=6379, password='123456', db=0)
self.scheme = 'bilibili'
self.fetcher = ProxyFetcher(self.scheme, strategy='robin', redis_args=args)
self.proxy = self.get_next_proxy()
# self.start = time.time() * 1000
def process_request(self, request, spider):
# self.start = time.time() * 1000
logger.info("this is request ip:" + self.proxy)
request.meta['proxy'] = self.proxy
return None
def process_response(self, request, response, spider):
# end = time.time() * 1000
# 如果返回的response状态不是200,重新生成当前request对象
if response.status != 200:
self.fetcher.proxy_feedback('failure', self.proxy)
logger.info('Current ip is blocked! The proxy is {}'.format(self.proxy))
# 对当前request换下一个代理
self.proxy = self.get_next_proxy()
request.meta['proxy'] = self.proxy
return request
else:
spider.logger.info('Request succeeded! The proxy is {}'.format(self.proxy))
# if you use greedy strategy, you must feedback
# duration=int(end - self.start)
self.fetcher.proxy_feedback('success', self.proxy)
return response
def process_exception(self, request, exception, spider):
logger.error('Request failed!The proxy is {}, {}. Exception:{}'.format(request.meta['proxy'], self.proxy, exception))
# it's important to feedback, otherwise you may use the bad proxy next time
self.fetcher.proxy_feedback('failure', self.proxy)
# 对当前request换下一个代理
self.proxy = self.get_next_proxy()
request.meta['proxy'] = self.proxy
return request
def get_next_proxy(self):
# 获取一个可用代理
return self.fetcher.get_proxy()
def spider_opened(self, spider):
logger.info('Spider opened: %s' % spider.name)
lmst2 commented
Issue resolved by changing Twisted==17.9.0
and Scrapy==1.5.0
to Twisted==20.3.0
and Scrapy==2.3.0
in requirements.txt