Cannot `import dask_histogram` which causes failures downstream (`hist.dask` etc) with the newest `dask 2024.9.0`
ikrommyd opened this issue · 9 comments
In [1]: import dask_histogram
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import dask_histogram
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_histogram/__init__.py:1
----> 1 from dask_histogram import config # isort:skip; load dask-histogram config
3 import boost_histogram.axis as _axis
4 import boost_histogram.storage as _storage
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_histogram/config.py:3
1 import os
----> 3 import dask.config
4 import yaml
6 config = dask.config.config
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/__init__.py:5
3 from dask import config, datasets
4 from dask._version import get_versions
----> 5 from dask.base import (
6 annotate,
7 compute,
8 get_annotations,
9 is_dask_collection,
10 optimize,
11 persist,
12 visualize,
13 )
14 from dask.core import istask
15 from dask.delayed import delayed
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/base.py:1039
1030 from dask import threaded
1032 named_schedulers.update(
1033 {
1034 "threads": threaded.get,
1035 "threading": threaded.get,
1036 }
1037 )
-> 1039 from dask import multiprocessing as dask_multiprocessing
1041 named_schedulers.update(
1042 {
1043 "processes": dask_multiprocessing.get,
1044 "multiprocessing": dask_multiprocessing.get,
1045 }
1046 )
1049 get_err_msg = """
1050 The get= keyword has been removed.
1051
(...)
1065 x.compute(scheduler=client)
1066 """.strip()
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/multiprocessing.py:19
17 from dask import config
18 from dask.local import MultiprocessingPoolExecutor, get_async, reraise
---> 19 from dask.optimization import cull, fuse
20 from dask.system import CPU_COUNT
21 from dask.typing import Key
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/optimization.py:10
7 from typing import Any
9 from dask import config, core, utils
---> 10 from dask._task_spec import GraphNode
11 from dask.base import normalize_token, tokenize
12 from dask.core import (
13 flatten,
14 get_dependencies,
(...)
19 toposort,
20 )
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/_task_spec.py:90
88 from dask.base import tokenize
89 from dask.core import reverse_dict
---> 90 from dask.sizeof import sizeof
91 from dask.typing import Key as KeyType
92 from dask.utils import is_namedtuple_instance
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/sizeof.py:318
312 except Exception:
313 logger.exception(
314 f"Failed to register sizeof entry point {entry_point.name}"
315 )
--> 318 _register_entry_point_plugins()
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/sizeof.py:309, in _register_entry_point_plugins()
307 """Register sizeof implementations exposed by the entry_point mechanism."""
308 for entry_point in importlib_metadata.entry_points(group="dask.sizeof"):
--> 309 registrar = entry_point.load()
310 try:
311 registrar(sizeof)
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/importlib_metadata/__init__.py:189, in EntryPoint.load(self)
184 """Load the entry point from its definition. If only a module
185 is indicated by the value, return that module. Otherwise,
186 return the named object.
187 """
188 match = cast(Match, self.pattern.match(self.value))
--> 189 module = import_module(match.group('module'))
190 attrs = filter(None, (match.group('attr') or '').split('.'))
191 return functools.reduce(getattr, attrs, module)
File ~/miniforge3/envs/egamma_dev/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/__init__.py:3
1 from dask_awkward import config # isort:skip; load awkward config
----> 3 import dask_awkward.lib.core as core
4 import dask_awkward.lib.describe as describe
5 import dask_awkward.lib.inspect as inspect
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/__init__.py:1
----> 1 import dask_awkward.lib.str as str
2 import dask_awkward.lib.utils as utils
3 from dask_awkward.lib.core import Array, PartitionCompatibility, Record, Scalar
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/str.py:10
7 import awkward.operations.str as akstr
8 from typing_extensions import ParamSpec
---> 10 from dask_awkward.lib.core import Array, map_partitions
12 T = TypeVar("T")
13 P = ParamSpec("P")
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/core.py:38
24 from awkward.typetracer import (
25 MaybeNone,
26 OneOf,
(...)
29 is_unknown_scalar,
30 )
31 from dask.base import (
32 DaskMethodsMixin,
33 dont_optimize,
(...)
36 unpack_collections,
37 )
---> 38 from dask.blockwise import BlockwiseDep
39 from dask.blockwise import blockwise as dask_blockwise
40 from dask.context import globalmethod
File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/blockwise.py:13
10 import tlz as toolz
12 import dask
---> 13 from dask.base import clone_key, get_name_from_key, tokenize
14 from dask.core import flatten, ishashable, keys_in_tasks, reverse_dict
15 from dask.highlevelgraph import HighLevelGraph, Layer
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/Users/iason/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/base.py)
well, that's a weird one. That error is inside dask itself.
Can you try:
from dask import blockwise
Does that fail?
Can you try:
from dask import blockwise
Does that fail?
@lgray yeah that errors out with ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import)
$ docker run --rm -ti python:3.12 /bin/bash
root@330e3f07c6ad:/# python -m venv venv && . venv/bin/activate
(venv) root@330e3f07c6ad:/# python -m pip --quiet install --upgrade uv
(venv) root@330e3f07c6ad:/# uv pip install dask dask-histogram
Resolved 12 packages in 466ms
Prepared 12 packages in 469ms
Installed 12 packages in 28ms
+ boost-histogram==1.5.0
+ click==8.1.7
+ cloudpickle==3.0.0
+ dask==2024.9.0
+ dask-histogram==2024.3.0
+ fsspec==2024.9.0
+ locket==1.0.0
+ numpy==2.1.1
+ packaging==24.1
+ partd==1.4.2
+ pyyaml==6.0.2
+ toolz==0.12.1
(venv) root@330e3f07c6ad:/# python -c 'from dask import blockwise'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
from dask.base import (
File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
from dask import multiprocessing as dask_multiprocessing
File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
from dask.optimization import cull, fuse
File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
from dask._task_spec import GraphNode
File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
from dask.sizeof import sizeof
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
_register_entry_point_plugins()
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
registrar = entry_point.load()
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
module = import_module(match.group('module'))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
from dask_histogram.core import (
File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)
(venv) root@330e3f07c6ad:/#
Just the stack trace:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
from dask.base import (
File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
from dask import multiprocessing as dask_multiprocessing
File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
from dask.optimization import cull, fuse
File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
from dask._task_spec import GraphNode
File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
from dask.sizeof import sizeof
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
_register_entry_point_plugins()
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
registrar = entry_point.load()
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
module = import_module(match.group('module'))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
from dask_histogram.core import (
File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)
It seems that there's an interference effect though given install of just dask
works:
$ docker run --rm -ti python:3.12 /bin/bash
root@1218450f6a25:/# python -m venv venv && . venv/bin/activate
(venv) root@1218450f6a25:/# python -m pip --quiet install --upgrade uv
(venv) root@1218450f6a25:/# uv pip install dask
Resolved 9 packages in 421ms
Prepared 9 packages in 209ms
Installed 9 packages in 7ms
+ click==8.1.7
+ cloudpickle==3.0.0
+ dask==2024.9.0
+ fsspec==2024.9.0
+ locket==1.0.0
+ packaging==24.1
+ partd==1.4.2
+ pyyaml==6.0.2
+ toolz==0.12.1
(venv) root@1218450f6a25:/# python -c 'from dask.base import clone_key'
(venv) root@1218450f6a25:/# echo $?
0
(venv) root@1218450f6a25:/# uv pip install dask-histogram
Resolved 12 packages in 380ms
Prepared 3 packages in 540ms
Installed 3 packages in 10ms
+ boost-histogram==1.5.0
+ dask-histogram==2024.3.0
+ numpy==2.1.1
(venv) root@1218450f6a25:/# python -c 'from dask.base import clone_key'
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
from dask.base import (
File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
from dask import multiprocessing as dask_multiprocessing
File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
from dask.optimization import cull, fuse
File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
from dask._task_spec import GraphNode
File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
from dask.sizeof import sizeof
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
_register_entry_point_plugins()
File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
registrar = entry_point.load()
^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
module = import_module(match.group('module'))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
from dask_histogram.core import (
File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)
(venv) root@1218450f6a25:/#
oh that's very weird... ok
I do see this in my environment with dask on main.
However, if I make a fresh environment, something like
$ conda create -n bit python==3.10
$ conda activate bit
$ pip install ".[complete]" # (in dask repo dir)
$ python
>>> import dask
is OK. If the same pip command is done in the dask-historgam repo, it fails.
If you install dask first, it imports. If you then install dask-historgam, it no longer does. The culprit is in pyproject.yaml:
[project.entry-points."dask.sizeof"]
dask_histogram = "dask_histogram.sizeof:register"
which causes the import of dask-histogram, importing dask while it is being imported. Why this changed I am not sure, but it should be possible to import the one module without importing the rest of dask-histogram anyway.
I believe it's this one: https://github.com/dask/dask/pull/11373/files
Thanks for investigating !
This is blocking us in bumping the dask ecosystem in nixpkgs.
Sorry, I was wrong, it was the following PR, perhaps these lines
Indeed, the following makes dask importable again. I'll post on their repo.
--- a/dask/optimization.py
+++ b/dask/optimization.py
@@ -7,8 +7,8 @@ from enum import Enum
from typing import Any
from dask import config, core, utils
-from dask._task_spec import GraphNode
+# from dask._task_spec import GraphNode
from dask.base import normalize_token, tokenize
from dask.core import (
flatten,
get_dependencies,
(GraphNode is used in the module, but only in places where it could be imported later)