ModuleNotFoundError: No module named 'unstructured.ingest.pipeline.reformat'
Closed this issue · 4 comments
Describe the bug
A clear and concise description of what the bug is.
Running this code in a jupyter notebook:
from unstructured.ingest.interfaces import PartitionConfig, ReadConfig
from unstructured.ingest.runner.sharepoint import sharepoint
if __name__ == "__main__":
output = sharepoint(
verbose=True,
read_config=ReadConfig(),
partition_config=PartitionConfig(
output_dir="sharepoint-ingest-output",
num_processes=2,
),
client_id=sharepoint_client_id,
client_cred=sharepoint_client_secret,
site="https://domain.sharepoint.com",
# Flag to process only files within the site(s)
files_only=True,
path="Shared Documents",
recursive=False,
)
display(output)
is erroring with
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[11], line 2
1 from unstructured.ingest.interfaces import PartitionConfig, ReadConfig
----> 2 from unstructured.ingest.runner.sharepoint import sharepoint
4 if __name__ == "__main__":
5 output = sharepoint(
6 verbose=True,
7 read_config=ReadConfig(),
(...)
18 recursive=False,
19 )
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/__init__.py:3
1 import typing as t
----> 3 from .airtable import AirtableRunner
4 from .azure import AzureRunner
5 from .biomed import BiomedRunner
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/airtable.py:6
3 import typing as t
5 from unstructured.ingest.logger import ingest_log_streaming_init, logger
----> 6 from unstructured.ingest.runner.base_runner import Runner
7 from unstructured.ingest.runner.utils import update_download_dir_hash
10 class AirtableRunner(Runner):
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/base_runner.py:14
3 from dataclasses import dataclass
5 from unstructured.ingest.interfaces import (
6 BaseDestinationConnector,
7 BaseSourceConnector,
(...)
12 ReadConfig,
13 )
---> 14 from unstructured.ingest.processor import process_documents
15 from unstructured.ingest.runner.writers import writer_map
18 @dataclass
19 class Runner(ABC):
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/processor.py:13
3 from contextlib import suppress
5 from unstructured.ingest.interfaces import (
6 BaseDestinationConnector,
7 BaseSourceConnector,
(...)
11 ProcessorConfig,
12 )
---> 13 from unstructured.ingest.pipeline import (
14 Chunker,
15 DocFactory,
16 Embedder,
17 Partitioner,
18 Pipeline,
19 PipelineContext,
20 Reader,
21 ReformatNode,
22 Writer,
23 )
25 with suppress(RuntimeError):
26 mp.set_start_method("spawn")
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/pipeline/__init__.py:5
3 from .partition import Partitioner
4 from .pipeline import Pipeline
----> 5 from .reformat.chunking import Chunker
6 from .reformat.embedding import Embedder
7 from .source import Reader
ModuleNotFoundError: No module named 'unstructured.ingest.pipeline.reformat'
To Reproduce
The code above should do it.
Rerunning pip install I see the latest version of unstructured
Requirement already satisfied: unstructured[all-docs] in /Users/david.lozzi/.pyenv/versions/3.8.16/lib/python3.8/site-packages (0.10.21)
- Filetype:
- Any additional API parameters:
Environment:
- Using the hosted API or self hosting? running locally in jupyter
- How are you calling the API? (Langchain, SDKs, cUrl, etc.) SDK as seen above
Additional context
Add any other context about the problem here.
@awalker4 new error after upgradeing all of the unstructured libraries
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[16], line 2
1 from unstructured.ingest.interfaces import PartitionConfig, ReadConfig
----> 2 from unstructured.ingest.runner.sharepoint import sharepoint
4 if __name__ == "__main__":
5 output = sharepoint(
6 verbose=True,
7 read_config=ReadConfig(),
(...)
18 recursive=False,
19 )
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/__init__.py:3
1 import typing as t
----> 3 from .airtable import AirtableRunner
4 from .azure import AzureRunner
5 from .biomed import BiomedRunner
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/airtable.py:6
3 import typing as t
5 from unstructured.ingest.logger import ingest_log_streaming_init, logger
----> 6 from unstructured.ingest.runner.base_runner import Runner
7 from unstructured.ingest.runner.utils import update_download_dir_hash
10 class AirtableRunner(Runner):
File ~/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/runner/base_runner.py:5
2 from abc import ABC, abstractmethod
3 from dataclasses import dataclass
----> 5 from unstructured.ingest.interfaces import (
6 BaseDestinationConnector,
7 BaseSourceConnector,
8 ChunkingConfig,
9 EmbeddingConfig,
10 PartitionConfig,
11 PermissionsConfig,
12 ProcessorConfig,
13 ReadConfig,
14 )
15 from unstructured.ingest.processor import process_documents
16 from unstructured.ingest.runner.writers import writer_map
ImportError: cannot import name 'PermissionsConfig' from 'unstructured.ingest.interfaces' (/Users/david.lozzi/.pyenv/versions/3.8.16/lib/python3.8/site-packages/unstructured/ingest/interfaces.py)
Hey @DavidLozzi! Jumping in to see if I can help.
0.10.22
should include a refactor that changed the runners to classes (yes, we need to update docs on our end, there's a PR in progress). In the meantime, you should be able to run with:
from unstructured.ingest.interfaces import PartitionConfig, ReadConfig
from unstructured.ingest.runner.sharepoint import SharePointRunner
Caveat: the fact that PermissionsConfig exists in your error suggests that you are up to date, but frankly I'm not sure why that's the error you're getting. When I try the same thing on 0.10.22 (tried at latest and at commit #ad1b93d to be sure) I get the error that I would expect, specifically:
ImportError Traceback (most recent call last)
/Users/ryannikolaidis/Development/unstructured/unstructured/scratch/scratch.ipynb Cell 7 line 2
1 from unstructured.ingest.interfaces import PartitionConfig, ReadConfig
----> 2 from unstructured.ingest.runner.sharepoint import sharepoint
ImportError: cannot import name 'sharepoint' from 'unstructured.ingest.runner.sharepoint' (/Users/ryannikolaidis/Development/unstructured/unstructured/unstructured/ingest/runner/sharepoint.py)
Happy to dig deeper. Or if you want to give that a whirl and let me know what you hit, we can go from there? Thanks for your patience!
Hi @DavidLozzi, I'm going to close this for now. Please let us know if you're still seeing this issue!