dask-contrib/dask-histogram

Cannot `import dask_histogram` which causes failures downstream (`hist.dask` etc) with the newest `dask 2024.9.0`

ikrommyd opened this issue · 9 comments

In [1]: import dask_histogram
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[1], line 1
----> 1 import dask_histogram

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_histogram/__init__.py:1
----> 1 from dask_histogram import config  # isort:skip; load dask-histogram config
      3 import boost_histogram.axis as _axis
      4 import boost_histogram.storage as _storage

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_histogram/config.py:3
      1 import os
----> 3 import dask.config
      4 import yaml
      6 config = dask.config.config

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/__init__.py:5
      3 from dask import config, datasets
      4 from dask._version import get_versions
----> 5 from dask.base import (
      6     annotate,
      7     compute,
      8     get_annotations,
      9     is_dask_collection,
     10     optimize,
     11     persist,
     12     visualize,
     13 )
     14 from dask.core import istask
     15 from dask.delayed import delayed

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/base.py:1039
   1030     from dask import threaded
   1032     named_schedulers.update(
   1033         {
   1034             "threads": threaded.get,
   1035             "threading": threaded.get,
   1036         }
   1037     )
-> 1039     from dask import multiprocessing as dask_multiprocessing
   1041     named_schedulers.update(
   1042         {
   1043             "processes": dask_multiprocessing.get,
   1044             "multiprocessing": dask_multiprocessing.get,
   1045         }
   1046     )
   1049 get_err_msg = """
   1050 The get= keyword has been removed.
   1051
   (...)
   1065     x.compute(scheduler=client)
   1066 """.strip()

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/multiprocessing.py:19
     17 from dask import config
     18 from dask.local import MultiprocessingPoolExecutor, get_async, reraise
---> 19 from dask.optimization import cull, fuse
     20 from dask.system import CPU_COUNT
     21 from dask.typing import Key

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/optimization.py:10
      7 from typing import Any
      9 from dask import config, core, utils
---> 10 from dask._task_spec import GraphNode
     11 from dask.base import normalize_token, tokenize
     12 from dask.core import (
     13     flatten,
     14     get_dependencies,
   (...)
     19     toposort,
     20 )

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/_task_spec.py:90
     88 from dask.base import tokenize
     89 from dask.core import reverse_dict
---> 90 from dask.sizeof import sizeof
     91 from dask.typing import Key as KeyType
     92 from dask.utils import is_namedtuple_instance

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/sizeof.py:318
    312         except Exception:
    313             logger.exception(
    314                 f"Failed to register sizeof entry point {entry_point.name}"
    315             )
--> 318 _register_entry_point_plugins()

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/sizeof.py:309, in _register_entry_point_plugins()
    307 """Register sizeof implementations exposed by the entry_point mechanism."""
    308 for entry_point in importlib_metadata.entry_points(group="dask.sizeof"):
--> 309     registrar = entry_point.load()
    310     try:
    311         registrar(sizeof)

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/importlib_metadata/__init__.py:189, in EntryPoint.load(self)
    184 """Load the entry point from its definition. If only a module
    185 is indicated by the value, return that module. Otherwise,
    186 return the named object.
    187 """
    188 match = cast(Match, self.pattern.match(self.value))
--> 189 module = import_module(match.group('module'))
    190 attrs = filter(None, (match.group('attr') or '').split('.'))
    191 return functools.reduce(getattr, attrs, module)

File ~/miniforge3/envs/egamma_dev/lib/python3.10/importlib/__init__.py:126, in import_module(name, package)
    124             break
    125         level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/__init__.py:3
      1 from dask_awkward import config  # isort:skip; load awkward config
----> 3 import dask_awkward.lib.core as core
      4 import dask_awkward.lib.describe as describe
      5 import dask_awkward.lib.inspect as inspect

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/__init__.py:1
----> 1 import dask_awkward.lib.str as str
      2 import dask_awkward.lib.utils as utils
      3 from dask_awkward.lib.core import Array, PartitionCompatibility, Record, Scalar

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/str.py:10
      7 import awkward.operations.str as akstr
      8 from typing_extensions import ParamSpec
---> 10 from dask_awkward.lib.core import Array, map_partitions
     12 T = TypeVar("T")
     13 P = ParamSpec("P")

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask_awkward/lib/core.py:38
     24 from awkward.typetracer import (
     25     MaybeNone,
     26     OneOf,
   (...)
     29     is_unknown_scalar,
     30 )
     31 from dask.base import (
     32     DaskMethodsMixin,
     33     dont_optimize,
   (...)
     36     unpack_collections,
     37 )
---> 38 from dask.blockwise import BlockwiseDep
     39 from dask.blockwise import blockwise as dask_blockwise
     40 from dask.context import globalmethod

File ~/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/blockwise.py:13
     10 import tlz as toolz
     12 import dask
---> 13 from dask.base import clone_key, get_name_from_key, tokenize
     14 from dask.core import flatten, ishashable, keys_in_tasks, reverse_dict
     15 from dask.highlevelgraph import HighLevelGraph, Layer

ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/Users/iason/miniforge3/envs/egamma_dev/lib/python3.10/site-packages/dask/base.py)

well, that's a weird one. That error is inside dask itself.

Can you try:

from dask import blockwise

Does that fail?

Can you try:

from dask import blockwise

Does that fail?

@lgray yeah that errors out with ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import)

$ docker run --rm -ti python:3.12 /bin/bash
root@330e3f07c6ad:/# python -m venv venv && . venv/bin/activate
(venv) root@330e3f07c6ad:/# python -m pip --quiet install --upgrade uv
(venv) root@330e3f07c6ad:/# uv pip install dask dask-histogram
Resolved 12 packages in 466ms
Prepared 12 packages in 469ms
Installed 12 packages in 28ms
 + boost-histogram==1.5.0
 + click==8.1.7
 + cloudpickle==3.0.0
 + dask==2024.9.0
 + dask-histogram==2024.3.0
 + fsspec==2024.9.0
 + locket==1.0.0
 + numpy==2.1.1
 + packaging==24.1
 + partd==1.4.2
 + pyyaml==6.0.2
 + toolz==0.12.1
(venv) root@330e3f07c6ad:/# python -c 'from dask import blockwise'
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
    from dask.base import (
  File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
    from dask import multiprocessing as dask_multiprocessing
  File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
    from dask.optimization import cull, fuse
  File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
    from dask._task_spec import GraphNode
  File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
    from dask.sizeof import sizeof
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
    _register_entry_point_plugins()
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
    registrar = entry_point.load()
                ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
    module = import_module(match.group('module'))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
    from dask_histogram.core import (
  File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
    from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
  File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
    from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)
(venv) root@330e3f07c6ad:/#
Just the stack trace:
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
    from dask.base import (
  File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
    from dask import multiprocessing as dask_multiprocessing
  File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
    from dask.optimization import cull, fuse
  File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
    from dask._task_spec import GraphNode
  File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
    from dask.sizeof import sizeof
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
    _register_entry_point_plugins()
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
    registrar = entry_point.load()
                ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
    module = import_module(match.group('module'))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
    from dask_histogram.core import (
  File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
    from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
  File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
    from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)

It seems that there's an interference effect though given install of just dask works:

$ docker run --rm -ti python:3.12 /bin/bash
root@1218450f6a25:/# python -m venv venv && . venv/bin/activate
(venv) root@1218450f6a25:/# python -m pip --quiet install --upgrade uv
(venv) root@1218450f6a25:/# uv pip install dask
Resolved 9 packages in 421ms
Prepared 9 packages in 209ms
Installed 9 packages in 7ms
 + click==8.1.7
 + cloudpickle==3.0.0
 + dask==2024.9.0
 + fsspec==2024.9.0
 + locket==1.0.0
 + packaging==24.1
 + partd==1.4.2
 + pyyaml==6.0.2
 + toolz==0.12.1
(venv) root@1218450f6a25:/# python -c 'from dask.base import clone_key'
(venv) root@1218450f6a25:/# echo $?
0
(venv) root@1218450f6a25:/# uv pip install dask-histogram
Resolved 12 packages in 380ms
Prepared 3 packages in 540ms
Installed 3 packages in 10ms
 + boost-histogram==1.5.0
 + dask-histogram==2024.3.0
 + numpy==2.1.1
(venv) root@1218450f6a25:/# python -c 'from dask.base import clone_key'
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/venv/lib/python3.12/site-packages/dask/__init__.py", line 5, in <module>
    from dask.base import (
  File "/venv/lib/python3.12/site-packages/dask/base.py", line 1039, in <module>
    from dask import multiprocessing as dask_multiprocessing
  File "/venv/lib/python3.12/site-packages/dask/multiprocessing.py", line 19, in <module>
    from dask.optimization import cull, fuse
  File "/venv/lib/python3.12/site-packages/dask/optimization.py", line 10, in <module>
    from dask._task_spec import GraphNode
  File "/venv/lib/python3.12/site-packages/dask/_task_spec.py", line 90, in <module>
    from dask.sizeof import sizeof
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 318, in <module>
    _register_entry_point_plugins()
  File "/venv/lib/python3.12/site-packages/dask/sizeof.py", line 309, in _register_entry_point_plugins
    registrar = entry_point.load()
                ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/metadata/__init__.py", line 205, in load
    module = import_module(match.group('module'))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/importlib/__init__.py", line 90, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/venv/lib/python3.12/site-packages/dask_histogram/__init__.py", line 6, in <module>
    from dask_histogram.core import (
  File "/venv/lib/python3.12/site-packages/dask_histogram/core.py", line 13, in <module>
    from dask.blockwise import BlockwiseDep, blockwise, fuse_roots, optimize_blockwise
  File "/venv/lib/python3.12/site-packages/dask/blockwise.py", line 13, in <module>
    from dask.base import clone_key, get_name_from_key, tokenize
ImportError: cannot import name 'clone_key' from partially initialized module 'dask.base' (most likely due to a circular import) (/venv/lib/python3.12/site-packages/dask/base.py)
(venv) root@1218450f6a25:/# 

oh that's very weird... ok

I do see this in my environment with dask on main.

However, if I make a fresh environment, something like

$ conda create -n bit python==3.10
$ conda activate bit
$ pip install ".[complete]"  # (in dask repo dir) 
$ python
>>> import dask

is OK. If the same pip command is done in the dask-historgam repo, it fails.

If you install dask first, it imports. If you then install dask-historgam, it no longer does. The culprit is in pyproject.yaml:

[project.entry-points."dask.sizeof"]
dask_histogram = "dask_histogram.sizeof:register"

which causes the import of dask-histogram, importing dask while it is being imported. Why this changed I am not sure, but it should be possible to import the one module without importing the rest of dask-histogram anyway.

Thanks for investigating !
This is blocking us in bumping the dask ecosystem in nixpkgs.

Sorry, I was wrong, it was the following PR, perhaps these lines

Indeed, the following makes dask importable again. I'll post on their repo.

--- a/dask/optimization.py
+++ b/dask/optimization.py
@@ -7,8 +7,8 @@ from enum import Enum
 from typing import Any

 from dask import config, core, utils
-from dask._task_spec import GraphNode
+# from dask._task_spec import GraphNode
 from dask.base import normalize_token, tokenize
 from dask.core import (
     flatten,
     get_dependencies,

(GraphNode is used in the module, but only in places where it could be imported later)