Cannot use wandb logging
jagilley opened this issue · 3 comments
jagilley commented
getting
root@05dcba51026c:/workspace/mistral-finetune# torchrun --nproc-per-node 2 --master_port $RANDOM -m train config.yaml
[2024-05-29 00:06:22,437] torch.distributed.run: [WARNING]
[2024-05-29 00:06:22,437] torch.distributed.run: [WARNING] *****************************************
[2024-05-29 00:06:22,437] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
[2024-05-29 00:06:22,437] torch.distributed.run: [WARNING] *****************************************
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 893, in from_dict
instance = cls(**init_args) # type: ignore
TypeError: TrainArgs.__init__() got an unexpected keyword argument 'wandb.project'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/workspace/mistral-finetune/train.py", line 323, in <module>
fire.Fire(train)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 143, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 477, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 693, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/workspace/mistral-finetune/train.py", line 60, in train
args: TrainArgs = TrainArgs.load(config, drop_extra_fields=False)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 306, in load
return load(cls, path=path, drop_extra_fields=drop_extra_fields, load_fn=load_fn, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 539, in load
return from_dict(cls, d, drop_extra_fields=drop_extra_fields)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 896, in from_dict
raise RuntimeError(
RuntimeError: Couldn't instantiate class <class 'finetune.args.TrainArgs'> using init args dict_keys(['data', 'model_id_or_path', 'run_dir', 'optim', 'seed', 'seq_len', 'batch_size', 'max_steps', 'log_freq', 'ckpt_freq', 'save_adapters', 'eval_freq', 'no_eval', 'lora', 'wandb.project', 'wandb.key']): TrainArgs.__init__() got an unexpected keyword argument 'wandb.project'
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 893, in from_dict
instance = cls(**init_args) # type: ignore
TypeError: TrainArgs.__init__() got an unexpected keyword argument 'wandb.project'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/workspace/mistral-finetune/train.py", line 323, in <module>
fire.Fire(train)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 143, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 477, in _Fire
component, remaining_args = _CallAndUpdateTrace(
File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 693, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "/workspace/mistral-finetune/train.py", line 60, in train
args: TrainArgs = TrainArgs.load(config, drop_extra_fields=False)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 306, in load
return load(cls, path=path, drop_extra_fields=drop_extra_fields, load_fn=load_fn, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 539, in load
return from_dict(cls, d, drop_extra_fields=drop_extra_fields)
File "/usr/local/lib/python3.10/dist-packages/simple_parsing/helpers/serialization/serializable.py", line 896, in from_dict
raise RuntimeError(
RuntimeError: Couldn't instantiate class <class 'finetune.args.TrainArgs'> using init args dict_keys(['data', 'model_id_or_path', 'run_dir', 'optim', 'seed', 'seq_len', 'batch_size', 'max_steps', 'log_freq', 'ckpt_freq', 'save_adapters', 'eval_freq', 'no_eval', 'lora', 'wandb.project', 'wandb.key']): TrainArgs.__init__() got an unexpected keyword argument 'wandb.project'
[2024-05-29 00:06:27,450] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 4370) of binary: /usr/bin/python
Traceback (most recent call last):
File "/usr/local/bin/torchrun", line 8, in <module>
sys.exit(main())
File "/usr/local/lib/python3.10/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 347, in wrapper
return f(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/distributed/run.py", line 812, in main
run(args)
File "/usr/local/lib/python3.10/dist-packages/torch/distributed/run.py", line 803, in run
elastic_launch(
File "/usr/local/lib/python3.10/dist-packages/torch/distributed/launcher/api.py", line 135, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/usr/local/lib/python3.10/dist-packages/torch/distributed/launcher/api.py", line 268, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
============================================================
train FAILED
donwany commented
SET THIS IN THE YAML FILE
max_steps: 300
run_dir: "/Users/johndoe/ultra_chat_test"
wandb.project: ultra_chat
make sure u install: pip install wandb
jagilley commented
I did all that
SaiKrishnaBala commented
Can you check if you are using the below format?
wandb:
project: "mistral_tuning" # your wandb project name
run_name: "" # your wandb run name
key: "XXXXXXXXXXXXXX" # your wandb api key
offline: False