asyncio.queues.QueueFull Exception broke sync.
cburgdorf opened this issue · 2 comments
cburgdorf commented
What is wrong?
This was originally reported in #2019 and surfaced by sync making no further progress. Upon further inspection of the logfile, this appears to have happened right before the sync got stuck.
DEBUG 2020-09-03 21:51:33,790 SkeletonSyncer Skeleton sync with ETHPeer (eth, 63) <Session <Node(0x1fbb02@34.239.157.14)> a55e806f-fd96-4963-8e5f-73243d2a6820> ended
DEBUG 2020-09-03 21:51:33,791 SkeletonSyncer Skeleton syncer had 0 pending headers when it was cancelled
DEBUG 2020-09-03 21:51:33,789 MuirGlacierVM Beam pivot over 1 txn preview for <BlockHeader #10790870 55f0d7cd> b/c StateUnretrievable('No servers for CollectMissingAccount') after 0.0s, %exec 100, stats: BeamStat: accts=0, a_nodes=0, codes=0, strg=0, s_nodes=0, nodes=0, rtt=0.000s, wait=0s
�[1m�[31m ERROR 2020-09-03 21:51:33,796 asyncio Exception in callback <TaskWakeupMethWrapper object at 0x7fa4481cd710>
handle: <Handle <TaskWakeupMethWrapper object at 0x7fa4481cd710>>
Traceback (most recent call last):
File "uvloop/cbhandles.pyx", line 70, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 122, in _handle_cancelled
await self._real_handle_cancelled()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 173, in _real_handle_cancelled
await asyncio_task
File "uvloop/cbhandles.pyx", line 70, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 165, in run
await self.child_manager.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 246, in run
for _, exc_value, exc_tb in self._errors
trio.MultiError: QueueFull("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa4281be9d0>,), original msg: '), QueueFull("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa37b28cd10>,), original msg: ')
Details of embedded exception 1:
Traceback (most recent call last):
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 181, in add
self._open_queue.put_nowait(task)
File "/usr/local/lib/python3.7/asyncio/queues.py", line 144, in put_nowait
raise QueueFull
asyncio.queues.QueueFull
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1002, in _hang_until_storage_served
await self._serve_storage(event)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1034, in _serve_storage
event.urgent,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 368, in download_storage
await self.ensure_nodes_present(need_nodes, urgent)
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 179, in ensure_nodes_present
BLOCK_IMPORT_MISSING_STATE_TIMEOUT,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 221, in _wait_for_nodes
await queue.add(unrequested_nodes)
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 187, in add
f'because qsize={qsize}, '
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa4281be9d0>,), original msg: ')
Details of embedded exception 2:
Traceback (most recent call last):
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 181, in add
self._open_queue.put_nowait(task)
File "/usr/local/lib/python3.7/asyncio/queues.py", line 144, in put_nowait
raise QueueFull
asyncio.queues.QueueFull
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1002, in _hang_until_storage_served
await self._serve_storage(event)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1034, in _serve_storage
event.urgent,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 368, in download_storage
await self.ensure_nodes_present(need_nodes, urgent)
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 179, in ensure_nodes_present
BLOCK_IMPORT_MISSING_STATE_TIMEOUT,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 221, in _wait_for_nodes
await queue.add(unrequested_nodes)
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 187, in add
f'because qsize={qsize}, '
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa37b28cd10>,), original msg: ')�[0m
�[1m�[31m ERROR 2020-09-03 21:51:33,798 asyncio Exception in callback <TaskWakeupMethWrapper object at 0x7fa35dc06d50>
handle: <Handle <TaskWakeupMethWrapper object at 0x7fa35dc06d50>>
Traceback (most recent call last):
File "uvloop/cbhandles.pyx", line 70, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 165, in run
await self.child_manager.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 234, in run
await self._wait_all_tasks_done()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 195, in _wait_all_tasks_done
await task
File "uvloop/cbhandles.pyx", line 70, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 122, in _handle_cancelled
await self._real_handle_cancelled()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 173, in _real_handle_cancelled
await asyncio_task
File "uvloop/cbhandles.pyx", line 70, in uvloop.loop.Handle._run
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 165, in run
await self.child_manager.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 246, in run
for _, exc_value, exc_tb in self._errors
trio.MultiError: QueueFull("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa4281be9d0>,), original msg: '), QueueFull("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa37b28cd10>,), original msg: ')
Details of embedded exception 1:
Traceback (most recent call last):
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 181, in add
self._open_queue.put_nowait(task)
File "/usr/local/lib/python3.7/asyncio/queues.py", line 144, in put_nowait
raise QueueFull
asyncio.queues.QueueFull
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1002, in _hang_until_storage_served
await self._serve_storage(event)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1034, in _serve_storage
event.urgent,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 368, in download_storage
await self.ensure_nodes_present(need_nodes, urgent)
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 179, in ensure_nodes_present
BLOCK_IMPORT_MISSING_STATE_TIMEOUT,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 221, in _wait_for_nodes
await queue.add(unrequested_nodes)
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 187, in add
f'because qsize={qsize}, '
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa4281be9d0>,), original msg: ')
Details of embedded exception 2:
Traceback (most recent call last):
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 181, in add
self._open_queue.put_nowait(task)
File "/usr/local/lib/python3.7/asyncio/queues.py", line 144, in put_nowait
raise QueueFull
asyncio.queues.QueueFull
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/usr/local/lib/python3.7/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1002, in _hang_until_storage_served
await self._serve_storage(event)
File "/usr/src/app/trinity/trinity/sync/beam/chain.py", line 1034, in _serve_storage
event.urgent,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 368, in download_storage
await self.ensure_nodes_present(need_nodes, urgent)
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 179, in ensure_nodes_present
BLOCK_IMPORT_MISSING_STATE_TIMEOUT,
File "/usr/src/app/trinity/trinity/sync/beam/state.py", line 221, in _wait_for_nodes
await queue.add(unrequested_nodes)
File "/usr/src/app/trinity/trinity/_utils/datastructures.py", line 187, in add
f'because qsize={qsize}, '
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xc20b1fd0d561fd36fc89a13e3c43454c7056ac697ffacd1443a0b2cec5a1a365') ", 'because qsize=6144, num_tasks=6114, maxsize=6144, open_slots=30, num queueing=1, len(_tasks)=6114, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7fa37b28cd10>,), original msg: ')�[0m
DEBUG 2020-09-03 21:51:33,798 MuirGlacierVM Beam pivot over 1 txn preview for <BlockHeader #10790870 55f0d7cd> b/c StateUnretrievable('No servers for CollectMissingAccount') after 0.0s, %exec 100, stats: BeamStat: accts=0, a_nodes=0, codes=0, strg=0, s_nodes=0, nodes=0, rtt=0.000s, wait=0s
How can it be fixed
Needs further investigation.
cburgdorf commented
It looks like this is the offending part which doesn't handle the QueueFull
exception.
trinity/trinity/sync/beam/state.py
Lines 238 to 240 in 0ab77d9
I'm not sure if catching the exception and returning 0
would be appropriate or if there is a better mechanism in place to signal that we need to slow down requesting? What would @carver do? 😅
pipermerriam commented
I got another of these today:
asyncio_run_in_process._utils.RemoteTraceback:
""" (exception from process: 23334)
Traceback (most recent call last):
File "/root/env/lib/python3.8/site-packages/trinity/_utils/datastructures.py", line 181, in add
self._open_queue.put_nowait(task)
File "/root/.pyenv/versions/3.8.5/lib/python3.8/asyncio/queues.py", line 148, in put_nowait
raise QueueFull
asyncio.queues.QueueFull
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/root/env/lib/python3.8/site-packages/asyncio_run_in_process/_child.py", line 168, in _run_on_asyncio
result: Any = loop.run_until_complete(_do_async_fn(async_fn, args, to_parent, loop))
File "uvloop/loop.pyx", line 1456, in uvloop.loop.Loop.run_until_complete
File "/root/env/lib/python3.8/site-packages/asyncio_run_in_process/_child.py", line 160, in _do_async_fn
return await async_fn_task
File "/root/env/lib/python3.8/site-packages/asyncio_run_in_process/_child.py", line 85, in _handle_coro
return await coro_task
File "/root/env/lib/python3.8/site-packages/trinity/extensibility/asyncio.py", line 79, in _do_run
await wait_first(
File "/root/env/lib/python3.8/site-packages/p2p/asyncio_utils.py", line 69, in wait_first
raise done_task.exception()
File "/root/env/lib/python3.8/site-packages/trinity/components/builtin/syncer/component.py", line 391, in do_run
await wait_first(tasks, max_wait_after_cancellation=2)
File "/root/env/lib/python3.8/site-packages/p2p/asyncio_utils.py", line 69, in wait_first
raise done_task.exception()
File "/root/env/lib/python3.8/site-packages/trinity/components/builtin/syncer/component.py", line 399, in launch_sync
await strategy.sync(
File "/root/env/lib/python3.8/site-packages/trinity/components/builtin/syncer/component.py", line 232, in sync
await manager.wait_finished()
File "/root/env/lib/python3.8/site-packages/async_generator/_util.py", line 42, in __aexit__
await self._agen.asend(None)
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 441, in background_asyncio_service
raise MultiError(
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 165, in run
await self.child_manager.run()
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 243, in run
raise MultiError(
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 165, in run
await self.child_manager.run()
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 243, in run
raise MultiError(
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/chain.py", line 812, in _preview_address_load
new_account_nodes, collection_time = await self._load_address_state(
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/chain.py", line 832, in _load_address_state
num_accounts, new_account_nodes = await self._request_address_nodes(
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/chain.py", line 898, in _request_address_nodes
collected_nodes = await self._state_downloader.download_accounts(
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/state.py", line 316, in download_accounts
await self.ensure_nodes_present(need_nodes, block_number, urgent)
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/state.py", line 182, in ensure_nodes_present
num_nodes_found = await self._wait_for_nodes(
File "/root/env/lib/python3.8/site-packages/trinity/sync/beam/state.py", line 241, in _wait_for_nodes
await queue.add(unrequested_nodes)
File "/root/env/lib/python3.8/site-packages/trinity/_utils/datastructures.py", line 185, in add
raise asyncio.QueueFull(
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xa762147673de680ad163aa4e5accdb22c49abccabff3987b4bc930b9891a2524') ", 'because qsize=6144, num_tasks=6142, maxsize=6144, open_slots=2, num queueing=2, len(_tasks)=6142, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7f8e15240ee0>, <trinity._utils.datastructures.PredefinedSortableTask object at 0x7f8def016d90>), original msg: ')
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/root/env/lib/python3.8/site-packages/trinity/bootstrap.py", line 315, in run
loop.run_until_complete(_run(boot_info, get_base_db_fn, component_manager_manager))
File "uvloop/loop.pyx", line 1456, in uvloop.loop.Loop.run_until_complete
File "/root/env/lib/python3.8/site-packages/trinity/bootstrap.py", line 275, in _run
await component_manager.run()
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 243, in run
raise MultiError(
File "/root/env/lib/python3.8/site-packages/async_service/base.py", line 300, in _run_and_manage_task
await task.run()
File "/root/env/lib/python3.8/site-packages/async_service/asyncio.py", line 35, in run
await self._async_fn(*self._async_fn_args)
File "/root/env/lib/python3.8/site-packages/trinity/extensibility/component_manager.py", line 105, in run
await wait_first(tasks, max_wait_after_cancellation=10)
File "/root/env/lib/python3.8/site-packages/p2p/asyncio_utils.py", line 69, in wait_first
raise done_task.exception()
File "/root/env/lib/python3.8/site-packages/trinity/extensibility/asyncio.py", line 21, in run_in_process
await run_in_process(self._do_run, subprocess_kwargs=self.get_subprocess_kwargs())
File "/root/env/lib/python3.8/site-packages/asyncio_run_in_process/main.py", line 424, in run_in_process
return proc.get_result_or_raise()
File "/root/env/lib/python3.8/site-packages/asyncio_run_in_process/process.py", line 219, in get_result_or_raise
raise self._error
asyncio.queues.QueueFull: ("TaskQueue unsuccessful in adding task HexBytes('0xa762147673de680ad163aa4e5accdb22c49abccabff3987b4bc930b9891a2524') ", 'because qsize=6144, num_tasks=6142, maxsize=6144, open_slots=2, num queueing=2, len(_tasks)=6142, task_idx=0, queuing=(<trinity._utils.datastructures.PredefinedSortableTask object at 0x7f8e15240ee0>, <trinity._utils.datastructures.PredefinedSortableTask object at 0x7f8def016d90>), original msg: ')