john-kurkowski/tldextract

Bug in cache

jordane95 opened this issue · 3 comments

File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connectionpool.py", line 467, in _make_request
KeyError: "namespace: publicsuffix.org-tlds key: {'urls': ('https://publicsuffix.org/list/public_suffix_list.dat', 'https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat'), 'fallback_to_snapshot': True}"
Traceback (most recent call last):
raise new_e
The above exception was the direct cause of the following exception:
return self.run_and_cache(
During handling of the above exception, another exception occurred:
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/requests/sessions.py", line 703, in send
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 112, in get
During handling of the above exception, another exception occurred:
self._validate_conn(conn)
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/requests/sessions.py", line 602, in get
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 112, in get
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 210, in run_and_cache
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/requests/adapters.py", line 486, in send
Traceback (most recent call last):
sock = connection.create_connection(
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connectionpool.py", line 491, in _make_request
raise ConnectionError(e, request=request)
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/requests/sessions.py", line 589, in request
raise KeyError("namespace: " + namespace + " key: " + repr(key))
resp = self.send(prep, **send_kwargs)
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
response = session.get(url, timeout=timeout)
response = self._make_request(
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connection.py", line 198, in _new_conn
Traceback (most recent call last):
return self.request("GET", url, **kwargs)
conn.connect()
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
During handling of the above exception, another exception occurred:
retries = retries.increment(
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connection.py", line 205, in _new_conn
urllib3.exceptions.NameResolutionError: : Failed to resolve 'publicsuffix.org' ([Errno -3] Temporary failure in name resolution)
Traceback (most recent call last):
The above exception was the direct cause of the following exception:
raise NameResolutionError(self.host, self, e) from e
File "/opt/conda/envs/datatrove/lib/python3.10/socket.py", line 955, in getaddrinfo
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1099, in _validate_conn
KeyError: "namespace: urls key: {'url': 'https://publicsuffix.org/list/public_suffix_list.dat'}"
result = cast(T, self.get(namespace=namespace, key=key_args))
result = cast(T, self.get(namespace=namespace, key=key_args))
Exception reading Public Suffix List url https://publicsuffix.org/list/public_suffix_list.dat
r = adapter.send(request, **kwargs)
result = func(**kwargs)
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 212, in run_and_cache
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/suffix_list.py", line 46, in find_first_response
raise KeyError("namespace: " + namespace + " key: " + repr(key))
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connectionpool.py", line 847, in urlopen
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='publicsuffix.org', port=443): Max retries exceeded with url: /list/public_suffix_list.dat (Caused by NameResolutionError(": Failed to resolve 'publicsuffix.org' ([Errno -3] Temporary failure in name resolution)"))
self.sock = sock = self._new_conn()
Traceback (most recent call last):
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 210, in run_and_cache
socket.gaierror: [Errno -3] Temporary failure in name resolution
Traceback (most recent call last):
resp = conn.urlopen(
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='publicsuffix.org', port=443): Max retries exceeded with url: /list/public_suffix_list.dat (Caused by NameResolutionError(": Failed to resolve 'publicsuffix.org' ([Errno -3] Temporary failure in name resolution)"))
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 230, in _fetch_url
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connectionpool.py", line 793, in urlopen
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/util/connection.py", line 60, in create_connection
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/util/retry.py", line 515, in increment
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/urllib3/connection.py", line 616, in connect
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/tldextract/cache.py", line 221, in cached_fetch_url
File "/opt/conda/envs/datatrove/lib/python3.10/site-packages/requests/adapters.py", line 519, in send

requests.exceptions.ConnectionError: HTTPSConnectionPool(host='publicsuffix.org', port=443): Max retries exceeded with url: /list/public_suffix_list.dat (Caused by NameResolutionError(": Failed to resolve 'publicsuffix.org' ([Errno -3] Temporary failure in name resolution)"))

Is your firewall blocking publicsuffix.org?

root@j-x6pir00f20-master-0:/# ping publicsuffix.org
ping: unknown host

Basically my network issue