edgarminers/python-edgar

TypeError: expected string or bytes-like object

manelpardo opened this issue · 1 comments

I am not being able to use the new updated package python-edgar==3.1.3, since it is producing some data type errors. When running

      1 import edgar
      2 edgar.download_index('../data', 1993, USER_AGENT, skip_all_present_except_last=False)

The following error appears:
1 import edgar
----> 2 edgar.download_index('../data', 1993, USER_AGENT, skip_all_present_except_last=False)

File ~/miniforge3/envs/pruebas/lib/python3.10/site-packages/edgar/main.py:133, in download_index(dest, since_year, user_agent, skip_all_present_except_last)
131 # naive: 200ms or 5QPS serialized
132 start = _get_millis()
--> 133 _download(file, dest, skip_file, user_agent)
134 elapsed = _get_millis() - start
135 if elapsed < REQUEST_BUDGET_MS:

File ~/miniforge3/envs/pruebas/lib/python3.10/site-packages/edgar/main.py:97, in _download(file, dest, skip_file, user_agent)
95 if url.endswith("zip"):
96 with tempfile.TemporaryFile(mode="w+b") as tmp:
---> 97 tmp.write(_url_get(url, user_agent))
98 with zipfile.ZipFile(tmp).open("master.idx") as z:
99 with io.open(dest + dest_name, "w+", encoding="utf-8") as idxfile:

File ~/miniforge3/envs/pruebas/lib/python3.10/site-packages/edgar/main.py:69, in _url_get(url, user_agent)
67 hdr = { 'User-Agent' : user_agent }
68 req = urllib.request.Request(url, headers=hdr)
---> 69 content =urllib.request.urlopen(req).read()
70 else:
71 # python 2
72 import urllib2

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
--> 216 return opener.open(url, data, timeout)

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:519, in OpenerDirector.open(self, fullurl, data, timeout)
516 req = meth(req)
518 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 519 response = self._open(req, data)
521 # post-process response
522 meth_name = protocol+"_response"

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:536, in OpenerDirector._open(self, req, data)
533 return result
535 protocol = req.type
--> 536 result = self._call_chain(self.handle_open, protocol, protocol +
537 '_open', req)
538 if result:
539 return result

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
498 return result

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:1391, in HTTPSHandler.https_open(self, req)
1390 def https_open(self, req):
-> 1391 return self.do_open(http.client.HTTPSConnection, req,
1392 context=self._context, check_hostname=self._check_hostname)

File ~/miniforge3/envs/pruebas/lib/python3.10/urllib/request.py:1348, in AbstractHTTPHandler.do_open(self, http_class, req, **http_conn_args)
1346 try:
1347 try:
-> 1348 h.request(req.get_method(), req.selector, req.data, headers,
1349 encode_chunked=req.has_header('Transfer-encoding'))
1350 except OSError as err: # timeout error
1351 raise URLError(err)

File ~/miniforge3/envs/pruebas/lib/python3.10/http/client.py:1282, in HTTPConnection.request(self, method, url, body, headers, encode_chunked)
1279 def request(self, method, url, body=None, headers={}, *,
1280 encode_chunked=False):
1281 """Send a complete request to the server."""
-> 1282 self._send_request(method, url, body, headers, encode_chunked)

File ~/miniforge3/envs/pruebas/lib/python3.10/http/client.py:1323, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked)
1320 encode_chunked = False
1322 for hdr, value in headers.items():
-> 1323 self.putheader(hdr, value)
1324 if isinstance(body, str):
1325 # RFC 2616 Section 3.7.1 says that text default has a
1326 # default charset of iso-8859-1.
1327 body = _encode(body, 'body')

File ~/miniforge3/envs/pruebas/lib/python3.10/http/client.py:1259, in HTTPConnection.putheader(self, header, *values)
1256 elif isinstance(one_value, int):
1257 values[i] = str(one_value).encode('ascii')
-> 1259 if _is_illegal_header_value(values[i]):
1260 raise ValueError('Invalid header value %r' % (values[i],))
1262 value = b'\r\n\t'.join(values)

TypeError: expected string or bytes-like object

Other info:

  • OS: iOS
  • Chip: M1 Pro
  • Environment: miniforge3
  • python 3.10
  • python-edgar 3.1.3

I ran your code with python-edgar 3.1.3 on python 3.8.9 on my M1 pro without issues. I am not using miniforge3 personally so I cannot help you with that.