Couldn't find a dataset script at /home/dell7960/PycharmProjects/DARE/MergeLM/glue/glue.py or any data file in the same directory.
Closed this issue · 1 comments
Synnai commented
The default value of cache_dir
in utils/load_config.py
brings following error:
python train_plms_glue.py --language_model_name roberta-base --dataset_name cola --learning_rate 1e-5 --num_runs 5
INFO:root:********** Run starts. **********
INFO:root:configuration is Namespace(dataset_name='cola', auxiliary_dataset_name='cola', language_model_name='roberta-base', multitask_training=False, batch_size=16, num_epochs=10, learning_rate=1e-05, gpu=0, num_runs=5, device='cuda:0', save_model_dir='./save_models/cola/roberta-base_lr1e-05')
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
Traceback (most recent call last):
File "/home/dell7960/PycharmProjects/DARE/MergeLM/train_plms_glue.py", line 84, in <module>
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(cache_dir, args.language_model_name))
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 686, in from_pretrained
tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 519, in get_tokenizer_config
resolved_config_file = cached_file(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/utils/hub.py", line 429, in cached_file
resolved_file = hf_hub_download(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 111, in _inner_fn
validate_repo_id(arg_value)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 159, in validate_repo_id
raise HFValidationError(
huggingface_hub.utils._validators.HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/mnt/data/yule/.cache/roberta-base'. Use `repo_type` argument if needed.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/dell7960/PycharmProjects/DARE/MergeLM/train_plms_glue.py", line 86, in <module>
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=args.language_model_name, cache_dir=cache_dir)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 686, in from_pretrained
tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 519, in get_tokenizer_config
resolved_config_file = cached_file(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/transformers/utils/hub.py", line 429, in cached_file
resolved_file = hf_hub_download(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py", line 119, in _inner_fn
return fn(*args, **kwargs)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/huggingface_hub/file_download.py", line 1418, in hf_hub_download
os.makedirs(os.path.dirname(blob_path), exist_ok=True)
File "/usr/lib/python3.10/os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "/usr/lib/python3.10/os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
File "/usr/lib/python3.10/os.py", line 215, in makedirs
makedirs(head, exist_ok=exist_ok)
[Previous line repeated 1 more time]
File "/usr/lib/python3.10/os.py", line 225, in makedirs
mkdir(name, mode)
PermissionError: [Errno 13] Permission denied: '/mnt/data'
so I set cache_dir
in utils/load_config.py
as the path of PycharmProjects /home/dell7960/PycharmProjects/DARE/MergeLM
, however, another error occurs as follow:
python train_plms_glue.py --language_model_name roberta-base --dataset_name cola --learning_rate 1e-5 --num_runs 5
INFO:root:********** Run starts. **********
INFO:root:configuration is Namespace(dataset_name='cola', auxiliary_dataset_name='cola', language_model_name='roberta-base', multitask_training=False, batch_size=16, num_epochs=10, learning_rate=1e-05, gpu=0, num_runs=5, device='cuda:0', save_model_dir='./save_models/cola/roberta-base_lr1e-05')
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /roberta-base/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
Traceback (most recent call last):
File "/home/dell7960/PycharmProjects/DARE/MergeLM/train_plms_glue.py", line 154, in <module>
train_dataset, val_dataset, test_dataset, num_labels = glue_data_loader.load_dataset(dataset_name=args.dataset_name,
File "/home/dell7960/PycharmProjects/DARE/MergeLM/utils/glue_data_loader.py", line 76, in load_dataset
dataset = load_dataset(path=os.path.join(cache_dir, "glue"), name=dataset_name)
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/datasets/load.py", line 1785, in load_dataset
builder_instance = load_dataset_builder(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/datasets/load.py", line 1514, in load_dataset_builder
dataset_module = dataset_module_factory(
File "/home/dell7960/PycharmProjects/DARE/.venv/lib/python3.10/site-packages/datasets/load.py", line 1233, in dataset_module_factory
raise FileNotFoundError(
FileNotFoundError: Couldn't find a dataset script at /home/dell7960/PycharmProjects/DARE/MergeLM/glue/glue.py or any data file in the same directory.
Could you please give any idea how to fix it?