Issue when installing unitxt on MacOS
Closed this issue · 1 comments
pawelknes commented
I encountered an issue when using unitxt after pulling last changes related to images etc. The issue seems to concern numpy
and pytorch
packages. For example, when running test_api.py
the following error occurres (at the bottom). Also, I was not able to install some packages from requirements due to issue with numpy
.
To solve the issue I had to use Python 3.9 (I tested also versions .8, .10 and .11) and downgrade numpy
to specific version (I used 1.24 but didn't test with other releases).
I know the same issue occurred for other people as well. We tested it on Mac Intel and Mac M2 systems.
../../src/unitxt/api.py:114: in load_dataset
return recipe().to_dataset(features=UNITXT_DATASET_SCHEMA)
../../src/unitxt/stream.py:231: in to_dataset
{
../../src/unitxt/stream.py:232: in <dictcomp>
key: Dataset.from_generator(
../../../../opt/anaconda3/envs/fm-eval/lib/python3.11/site-packages/datasets/arrow_dataset.py:1117: in from_generator
return GeneratorDatasetInputStream(
../../../../opt/anaconda3/envs/fm-eval/lib/python3.11/site-packages/datasets/io/generator.py:28: in __init__
self.builder = Generator(
../../../../opt/anaconda3/envs/fm-eval/lib/python3.11/site-packages/datasets/builder.py:374: in __init__
self.config, self.config_id = self._create_builder_config(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <datasets.packaged_modules.generator.generator.Generator object at 0x127adc190>
config_name = None
custom_features = {'source': Value(dtype='string', id=None), 'target': Value(dtype='string', id=None), 'references': Sequence(feature=Va...'string', id=None), 'data_classification_policy': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None)}
config_kwargs = {'features': {'source': Value(dtype='string', id=None), 'target': Value(dtype='string', id=None), 'references': Sequen...me': 'test'}, caching=False, copying=False), 'stream_name': 'test'}, caching=False, copying=False)}>, 'split': 'train'}
builder_config = None
def _create_builder_config(
self, config_name=None, custom_features=None, **config_kwargs
) -> Tuple[BuilderConfig, str]:
"""Create and validate BuilderConfig object as well as a unique config id for this config.
Raises ValueError if there are multiple builder configs and config_name and DEFAULT_CONFIG_NAME are None.
config_kwargs override the defaults kwargs in config
"""
builder_config = None
# try default config
if config_name is None and self.BUILDER_CONFIGS:
if self.DEFAULT_CONFIG_NAME is not None:
builder_config = self.builder_configs.get(self.DEFAULT_CONFIG_NAME)
logger.info(f"No config specified, defaulting to: {self.dataset_name}/{builder_config.name}")
else:
if len(self.BUILDER_CONFIGS) > 1:
if not config_kwargs:
example_of_usage = f"load_dataset('{self.dataset_name}', '{self.BUILDER_CONFIGS[0].name}')"
raise ValueError(
"Config name is missing."
f"\nPlease pick one among the available configs: {list(self.builder_configs.keys())}"
+ f"\nExample of usage:\n\t`{example_of_usage}`"
)
else:
builder_config = self.BUILDER_CONFIGS[0]
logger.info(
f"No config specified, defaulting to the single config: {self.dataset_name}/{builder_config.name}"
)
# try to get config by name
if isinstance(config_name, str):
builder_config = self.builder_configs.get(config_name)
if builder_config is None and self.BUILDER_CONFIGS:
raise ValueError(
f"BuilderConfig '{config_name}' not found. Available: {list(self.builder_configs.keys())}"
)
# if not using an existing config, then create a new config on the fly
if not builder_config:
if config_name is not None:
config_kwargs["name"] = config_name
elif self.DEFAULT_CONFIG_NAME and not config_kwargs:
# Use DEFAULT_CONFIG_NAME only if no config_kwargs are passed
config_kwargs["name"] = self.DEFAULT_CONFIG_NAME
if "version" not in config_kwargs and hasattr(self, "VERSION") and self.VERSION:
config_kwargs["version"] = self.VERSION
> builder_config = self.BUILDER_CONFIG_CLASS(**config_kwargs)
E TypeError: GeneratorConfig.__init__() got an unexpected keyword argument 'split'
../../../../opt/anaconda3/envs/fm-eval/lib/python3.11/site-packages/datasets/builder.py:612: TypeError
elronbandel commented
I see this is due to datasets version, only the new one support the split argument. will fix it.