Raffaelbdl/jpdb-anki

AKPG Generation Error

Closed this issue · 1 comments

Thanks for this tool! Super excited to get this working. Ran into an error here:

jpdb-anki % python -m jpdb_anki -t scrape -vl https://jpdb.io/anime/27/shiki/vocabulary-list Creating new list shiki_vocabulary-list Traceback (most recent call last): File "/Users/doinkmachine/miniconda3/lib/python3.10/runpy.py", line 196, in _run_module_as_main return _run_code(code, main_globals, None, File "/Users/doinkmachine/miniconda3/lib/python3.10/runpy.py", line 86, in _run_code exec(code, run_globals) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/__main__.py", line 61, in <module> app.run(main) File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/absl/app.py", line 308, in run _run_main(main, args) File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main sys.exit(main(argv)) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/__main__.py", line 50, in main vocab_entries = db.get_list(FLAGS.vocablist) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/database.py", line 111, in get_list vocab = get_all_vocab_entries(url) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 31, in get_all_vocab_entries vocab_entries += get_all_vocab_entries(base_url + next_root) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 31, in get_all_vocab_entries vocab_entries += get_all_vocab_entries(base_url + next_root) File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 31, in get_all_vocab_entries vocab_entries += get_all_vocab_entries(base_url + next_root) [Previous line repeated 35 more times] File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 30, in get_all_vocab_entries next_root = jpdb.find(class_="pagination").find_all("a", href=True)[-1]["href"][:-2] AttributeError: 'NoneType' object has no attribute 'find_all'

Tried it with the example vocab list and got this error:

`python -m jpdb_anki -t scrape -vl https://jpdb.io/novel/5829/kuma-kuma-kuma-bear/vocabulary-list
Creating new list kuma-kuma-kuma-bear_vocabulary-list
Traceback (most recent call last):
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1042, in validate_conn
conn.connect()
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/connection.py", line 414, in connect
self.sock = ssl_wrap_socket(
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/util/ssl
.py", line 449, in ssl_wrap_socket
ssl_sock = ssl_wrap_socket_impl(
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/util/ssl
.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "/Users/doinkmachine/miniconda3/lib/python3.10/ssl.py", line 513, in wrap_socket
return self.sslsocket_class._create(
File "/Users/doinkmachine/miniconda3/lib/python3.10/ssl.py", line 1071, in _create
self.do_handshake()
File "/Users/doinkmachine/miniconda3/lib/python3.10/ssl.py", line 1342, in do_handshake
self._sslobj.do_handshake()
ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:997)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/adapters.py", line 489, in send
resp = conn.urlopen(
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
retries = retries.increment(
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/urllib3/util/retry.py", line 592, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='jpdb.io', port=443): Max retries exceeded with url: /novel/5829/kuma-kuma-kuma-bear/vocabulary-list (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:997)')))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/Users/doinkmachine/miniconda3/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/Users/doinkmachine/miniconda3/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/Users/doinkmachine/jpdb-anki/jpdb_anki/main.py", line 61, in
app.run(main)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/absl/app.py", line 308, in run
_run_main(main, args)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main
sys.exit(main(argv))
File "/Users/doinkmachine/jpdb-anki/jpdb_anki/main.py", line 50, in main
vocab_entries = db.get_list(FLAGS.vocablist)
File "/Users/doinkmachine/jpdb-anki/jpdb_anki/database.py", line 111, in get_list
vocab = get_all_vocab_entries(url)
File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 23, in get_all_vocab_entries
jpdb = load_url(root_url)
File "/Users/doinkmachine/jpdb-anki/jpdb_anki/scraping.py", line 8, in load_url
return BeautifulSoup(requests.get(url).content, "html.parser")
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "/Users/doinkmachine/miniconda3/lib/python3.10/site-packages/requests/adapters.py", line 563, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='jpdb.io', port=443): Max retries exceeded with url: /novel/5829/kuma-kuma-kuma-bear/vocabulary-list (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:997)')))
`

Hi !
For the second issue, it might simply be due to your own system or JPDB. Please try again later (after making the modification below)

For the first issue, can you try again after adding the following lines in scraping.py between lines 29 and 31:

    without_prev = jpdb.find(class_="pagination without-prev")
    if without_prev:
        next_root = without_prev.find_all("a", href=True)[-1]["href"][:-2]
        vocab_entries += get_all_vocab_entries(base_url + next_root)
        return vocab_entries

I was able to create the deck even without these lines, but maybe miniconda has a different way to find elements in a HTML file. Here I handle the specific case of the first page of the vocabulary list which has a different way to handle pagination.