Spider fails on redirect with null byte
lorien opened this issue · 0 comments
lorien commented
Fails on python2, works on python3
def test_redirect_with_invalid_byte(self):
url = self.server.get_url()
invalid_url = b'http://\xa0' + url.encode('ascii')
def callback(server):
server.set_status(301)
server.add_header('Location', invalid_url)
server.write('')
server.finish()
class TestSpider(Spider):
def task_generator(self):
#yield Task('page', url='http://www.tripadvisor.com/ShowUrl?
#&excludeFromVS=false&odc=BusinessListingsUrl&d=4289178&url=1')
#yield Task('page', invalid_url)
yield Task('page', url)
def task_page(self, grab, task):
pass
self.server.response['callback'] = callback
bot = build_spider(TestSpider)
bot.run()
Log:
Traceback (most recent call last):
File "/home/lorien/web/grab/grab/spider/base_service.py", line 32, in wrapper
callback(*args, **kwargs)
File "/home/lorien/web/grab/grab/spider/network_service/threaded.py", line 78, in worker_callback
grab.request()
File "/home/lorien/web/grab/grab/base.py", line 481, in request
referer=None)
File "/home/lorien/web/grab/grab/base.py", line 410, in prepare_request
self.transport.process_config(self)
File "/home/lorien/web/grab/grab/transport/urllib3.py", line 132, in process_config
u'%s: %s' % (six.text_type(ex), grab.config['url']))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xa0 in position 7: ordinal not in range(128)
E....
======================================================================
ERROR: test_redirect_with_invalid_byte (tests.spider_error.SpiderErrorTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/lorien/web/grab/tests/spider_error.py", line 80, in test_redirect_with_invalid_byte
bot.run()
File "/home/lorien/web/grab/grab/spider/base.py", line 693, in run
raise exc_info[1]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xa0 in position 7: ordinal not in range(128)
----------------------------------------------------------------------