lorien/grab

urrlib3 tests fail with UnicodeError idna coded failed

lorien opened this issue · 0 comments

https://travis-ci.org/lorien/grab/jobs/371813278#

It fails only on python3

The URL is http://13354&altProductId=6423589&productId=6423589&altProductStoreId=13713&catalogId=10001&categoryId=28678&productStoreId=13713http://www.textbooksnow.com/webapp/wcs/stores/servlet/ProductDisplay?langId=-1&storeId=

ERROR: test_generator_with_invalid_url (tests.spider_error.SpiderErrorTestCase)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/encodings/idna.py", line 167, in encode
    raise UnicodeError("label too long")
UnicodeError: label too long
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
  File "/home/travis/build/lorien/grab/tests/spider_error.py", line 29, in test_generator_with_invalid_url
    bot.run()
  File "/home/travis/build/lorien/grab/grab/spider/base.py", line 693, in run
    raise exc_info[1]
  File "/home/travis/build/lorien/grab/grab/spider/base_service.py", line 32, in wrapper
    callback(*args, **kwargs)
  File "/home/travis/build/lorien/grab/grab/spider/network_service/threaded.py", line 76, in worker_callback
    grab.request()
  File "/home/travis/build/lorien/grab/grab/base.py", line 461, in request
    self.transport.request()
  File "/home/travis/build/lorien/grab/grab/transport/urllib3.py", line 287, in request
    preload_content=False)
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/poolmanager.py", line 321, in urlopen
    response = conn.urlopen(method, u.request_uri, **kw)
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/connectionpool.py", line 601, in urlopen
    chunked=chunked)
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/connectionpool.py", line 357, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "/usr/lib/python3.4/http/client.py", line 1125, in request
    self._send_request(method, url, body, headers)
  File "/usr/lib/python3.4/http/client.py", line 1163, in _send_request
    self.endheaders(body)
  File "/usr/lib/python3.4/http/client.py", line 1121, in endheaders
    self._send_output(message_body)
  File "/usr/lib/python3.4/http/client.py", line 951, in _send_output
    self.send(msg)
  File "/usr/lib/python3.4/http/client.py", line 886, in send
    self.connect()
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/connection.py", line 166, in connect
    conn = self._new_conn()
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/connection.py", line 141, in _new_conn
    (self.host, self.port), self.timeout, **extra_kw)
  File "/home/travis/build/lorien/grab/.tox/py34-threaded-urllib3/lib/python3.4/site-packages/urllib3/util/connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/usr/lib/python3.4/socket.py", line 533, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
UnicodeError: encoding with 'idna' codec failed (UnicodeError: label too long)

Unittest code to reproduce low-level exception:

import six

from tests.util import BaseGrabTestCase, only_grab_transport


class GrabApiTestCase(BaseGrabTestCase):
    def setUp(self):
        self.server.reset()

    @only_grab_transport('urllib3')
    def test_urllib3_idna_error(self):
        invalid_url = (
            'http://13354&altProductId=6423589&productId=6423589'
            '&altProductStoreId=13713&catalogId=10001'
            '&categoryId=28678&productStoreId=13713'
            'http://www.textbooksnow.com/webapp/wcs/stores'
            '/servlet/ProductDisplay?langId=-1&storeId='
        )
        from urllib3 import PoolManager
        from urllib3.exceptions import NewConnectionError
        pool = PoolManager()
        exc_cls = UnicodeError if six.PY3 else NewConnectionError
        self.assertRaises(
            exc_cls, pool.request, 'GET', invalid_url,
            retries=False
        )