explosion/spaCy

[BUG] -- Arguments `enable` and `disable` not working as expected in `spacy.load`

it176131 opened this issue · 0 comments

How to reproduce the behaviour

The problem

This raises error E1042.

import spacy

spacy.load("en_core_web_sm", enable=["senter"])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 2
      1 # Error E1042.
----> 2 nlp = spacy.load("en_core_web_sm", enable=["senter"])

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\__init__.py:51, in load(name, vocab, disable, enable, exclude, config)
     27 def load(
     28     name: Union[str, Path],
     29     *,
   (...)
     34     config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
     35 ) -> Language:
     36     """Load a spaCy model from an installed package or a local path.
     37 
     38     name (str): Package name or model path.
   (...)
     49     RETURNS (Language): The loaded nlp object.
     50     """
---> 51     return util.load_model(
     52         name,
     53         vocab=vocab,
     54         disable=disable,
     55         enable=enable,
     56         exclude=exclude,
     57         config=config,
     58     )

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\util.py:465, in load_model(name, vocab, disable, enable, exclude, config)
    463     return get_lang_class(name.replace("blank:", ""))()
    464 if is_package(name):  # installed as package
--> 465     return load_model_from_package(name, **kwargs)  # type: ignore[arg-type]
    466 if Path(name).exists():  # path to model data directory
    467     return load_model_from_path(Path(name), **kwargs)  # type: ignore[arg-type]

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\util.py:501, in load_model_from_package(name, vocab, disable, enable, exclude, config)
    484 """Load a model from an installed package.
    485 
    486 name (str): The package name.
   (...)
    498 RETURNS (Language): The loaded nlp object.
    499 """
    500 cls = importlib.import_module(name)
--> 501 return cls.load(vocab=vocab, disable=disable, enable=enable, exclude=exclude, config=config)

File ~\ADO\ml_kg\env\Lib\site-packages\en_core_web_sm\__init__.py:10, in load(**overrides)
      9 def load(**overrides):
---> 10     return load_model_from_init_py(__file__, **overrides)

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\util.py:682, in load_model_from_init_py(init_file, vocab, disable, enable, exclude, config)
    680 if not model_path.exists():
    681     raise IOError(Errors.E052.format(path=data_path))
--> 682 return load_model_from_path(
    683     data_path,
    684     vocab=vocab,
    685     meta=meta,
    686     disable=disable,
    687     enable=enable,
    688     exclude=exclude,
    689     config=config,
    690 )

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\util.py:539, in load_model_from_path(model_path, meta, vocab, disable, enable, exclude, config)
    537 overrides = dict_to_dot(config, for_overrides=True)
    538 config = load_config(config_path, overrides=overrides)
--> 539 nlp = load_model_from_config(
    540     config,
    541     vocab=vocab,
    542     disable=disable,
    543     enable=enable,
    544     exclude=exclude,
    545     meta=meta,
    546 )
    547 return nlp.from_disk(model_path, exclude=exclude, overrides=overrides)

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\util.py:587, in load_model_from_config(config, meta, vocab, disable, enable, exclude, auto_fill, validate)
    584 # This will automatically handle all codes registered via the languages
    585 # registry, including custom subclasses provided via entry points
    586 lang_cls = get_lang_class(nlp_config["lang"])
--> 587 nlp = lang_cls.from_config(
    588     config,
    589     vocab=vocab,
    590     disable=disable,
    591     enable=enable,
    592     exclude=exclude,
    593     auto_fill=auto_fill,
    594     validate=validate,
    595     meta=meta,
    596 )
    597 return nlp

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\language.py:1973, in Language.from_config(cls, config, vocab, disable, enable, exclude, meta, auto_fill, validate)
   1965         warnings.warn(
   1966             Warnings.W123.format(
   1967                 enable=enable,
   1968                 enabled=enabled,
   1969             )
   1970         )
   1972 # Ensure sets of disabled/enabled pipe names are not contradictory.
-> 1973 disabled_pipes = cls._resolve_component_status(
   1974     list({*disable, *config["nlp"].get("disabled", [])}),
   1975     enable,
   1976     config["nlp"]["pipeline"],
   1977 )
   1978 nlp._disabled = set(p for p in disabled_pipes if p not in exclude)
   1980 nlp.batch_size = config["nlp"]["batch_size"]

File ~\ADO\ml_kg\env\Lib\site-packages\spacy\language.py:2153, in Language._resolve_component_status(disable, enable, pipe_names)
   2151     # If any pipe to be enabled is in to_disable, the specification is inconsistent.
   2152     if len(set(enable) & to_disable):
-> 2153         raise ValueError(Errors.E1042.format(enable=enable, disable=disable))
   2155 return tuple(to_disable)

ValueError: [E1042] `enable=['senter']` and `disable=['senter']` are inconsistent with each other.
If you only passed one of `enable` or `disable`, the other argument is specified in your pipeline's configuration.
In that case pass an empty list for the previously not specified argument to avoid this error.

Based on the error message I tried setting the disable argument to an empty list, but this raises the same error:

# Another error E1042.
nlp = spacy.load("en_core_web_sm", enable=["senter"], disable=[])
ValueError: [E1042] `enable=['senter']` and `disable=['senter']` are inconsistent with each other.
If you only passed one of `enable` or `disable`, the other argument is specified in your pipeline's configuration.
In that case pass an empty list for the previously not specified argument to avoid this error.

This lead me to believe that something is wrong with the disable argument.

# Runs, but does not return as expected.
nlp = spacy.load("en_core_web_sm", disable=[])
nlp.disabled  # ['senter']

Hacky solutions

I've found at least two ways to get around it.

  1. Bypass the enable/disable arguments and supply a dictionary to the config argument:
# Runs and returns as expected.
nlp = spacy.load("en_core_web_sm", config={"nlp": {"disabled": []}})
nlp.disabled  # []
"senter" in nlp.pipe_names  # True
  1. Or load the model as-is and enable the component after-the-fact:
# Runs and returns as expected.
nlp = spacy.load("en_core_web_sm")
nlp.enable_pipe("senter")
nlp.disabled  # []
"senter" in nlp.pipe_names  # True

Thoughts?

I don't really like either of the hacky solutions as I'd expect the enable/disable arguments to handle this. I'd be willing to submit a PR if this is indeed a bug.

Your Environment

  • Operating System: Windows 10
  • Python Version Used: 3.12.4
  • spaCy Version Used: 3.7.5
  • Environment Information: Results of pip list
Package                    Version        Editable project location
-------------------------- -------------- ---------------------------
aiofiles                   24.1.0
annotated-types            0.7.0
anyio                      4.4.0
argon2-cffi                23.1.0
argon2-cffi-bindings       21.2.0
arrow                      1.3.0
asttokens                  2.4.1
async-lru                  2.0.4
attrs                      23.2.0
Babel                      2.15.0
beautifulsoup4             4.12.3
black                      24.4.2
bleach                     6.1.0
blis                       0.7.11
cachetools                 5.4.0
catalogue                  2.0.10
certifi                    2024.7.4
cffi                       1.16.0
cfgv                       3.4.0
charset-normalizer         3.3.2
click                      8.1.7
cloudpathlib               0.18.1
colorama                   0.4.6
comm                       0.2.2
confection                 0.1.5
contourpy                  1.2.1
curated-tokenizers         0.0.9
curated-transformers       0.1.1
cycler                     0.12.1
cymem                      2.0.8
debugpy                    1.8.2
decorator                  5.1.1
defusedxml                 0.7.1
distlib                    0.3.8
en-core-web-lg             3.7.1
en-core-web-md             3.7.1
en-core-web-sm             3.7.1
en-core-web-trf            3.7.3
executing                  2.0.1
fastapi                    0.110.3
fastjsonschema             2.20.0
filelock                   3.15.4
fonttools                  4.53.1
fqdn                       1.5.1
fsspec                     2024.6.1
h11                        0.14.0
httpcore                   1.0.5
httpx                      0.27.0
huggingface-hub            0.24.5
identify                   2.6.0
idna                       3.7
ipykernel                  6.29.5
ipython                    8.26.0
ipython-genutils           0.2.0
ipywidgets                 8.1.3
isoduration                20.11.0
isort                      5.13.2
jedi                       0.19.1
Jinja2                     3.1.4
json5                      0.9.25
jsonpointer                3.0.0
jsonschema                 4.23.0
jsonschema-specifications  2023.12.1
jupyter                    1.0.0
jupyter_client             8.6.2
jupyter-console            6.6.3
jupyter_core               5.7.2
jupyter-events             0.10.0
jupyter-lsp                2.2.5
jupyter_server             2.14.2
jupyter_server_terminals   0.5.3
jupyterlab                 4.2.4
jupyterlab_pygments        0.3.0
jupyterlab_server          2.27.3
jupyterlab_widgets         3.0.11
kiwisolver                 1.4.5
langcodes                  3.4.0
language_data              1.2.0
marisa-trie                1.2.0
markdown-it-py             3.0.0
MarkupSafe                 2.1.5
matplotlib                 3.9.1
matplotlib-inline          0.1.7
mdurl                      0.1.2
mistune                    3.0.2
mpmath                     1.3.0
murmurhash                 1.0.10
mypy                       1.10.1
mypy-extensions            1.0.0
nbclassic                  1.1.0
nbclient                   0.10.0
nbconvert                  7.16.4
nbformat                   5.10.4
neo4j                      5.22.0
nest-asyncio               1.6.0
networkx                   3.3
nodeenv                    1.9.1
notebook                   7.2.1
notebook_shim              0.2.4
numpy                      1.26.4
overrides                  7.7.0
packaging                  24.1
pandocfilters              1.5.1
parso                      0.8.4
pathspec                   0.12.1
peewee                     3.16.3
pillow                     10.4.0
pip                        24.2
pip-system-certs           4.0
platformdirs               4.2.2
pre-commit                 3.7.1
preshed                    3.0.9
prodigy                    1.15.6
prodigy_pdf                0.2.2
prometheus_client          0.20.0
prompt_toolkit             3.0.47
psutil                     6.0.0
pure_eval                  0.2.3
pycparser                  2.22
pydantic                   2.8.2
pydantic_core              2.20.1
Pygments                   2.18.0
PyJWT                      2.8.0
pyparsing                  3.1.2
pypdfium2                  4.20.0
pytesseract                0.3.10
python-dateutil            2.9.0.post0
python-dotenv              1.0.1
python-json-logger         2.0.7
pytz                       2024.1
pywin32                    306
pywinpty                   2.0.13
PyYAML                     6.0.1
pyzmq                      26.0.3
qtconsole                  5.5.2
QtPy                       2.4.1
radicli                    0.0.25
referencing                0.35.1
regex                      2024.7.24
requests                   2.32.3
rfc3339-validator          0.1.4
rfc3986-validator          0.1.1
rich                       13.7.1
rpds-py                    0.19.0
ruff                       0.5.2
safetensors                0.4.3
Send2Trash                 1.8.3
setuptools                 70.3.0
shellingham                1.5.4
six                        1.16.0
smart-open                 7.0.4
sniffio                    1.3.1
soupsieve                  2.5
spacy                      3.7.5
spacy-alignments           0.9.1
spacy-curated-transformers 0.2.2
spacy-legacy               3.0.12
spacy-llm                  0.7.2
spacy-loggers              1.0.5
spacy-lookups-data         1.0.5
spacy-transformers         1.3.5
srsly                      2.4.8
stack-data                 0.6.3
starlette                  0.37.2
sympy                      1.13.1
terminado                  0.18.1
thinc                      8.2.5
tinycss2                   1.3.0
tokenizers                 0.15.2
toolz                      0.12.1
torch                      2.4.0
tornado                    6.4.1
tqdm                       4.66.4
traitlets                  5.14.3
transformers               4.36.2
typeguard                  3.0.2
typer                      0.12.3
types-python-dateutil      2.9.0.20240316
typing_extensions          4.12.2
uri-template               1.3.0
urllib3                    2.2.2
uvicorn                    0.26.0
virtualenv                 20.26.3
wasabi                     1.1.3
wcwidth                    0.2.13
weasel                     0.4.1
webcolors                  24.6.0
webencodings               0.5.1
websocket-client           1.8.0
widgetsnbextension         4.0.11
wrapt                      1.16.0