NVIDIA/flowtron

List index out of range

Ben10lightningx opened this issue · 0 comments

When I enter the command python train.py -c config.json -p train_config.output_directory=outdir data_config.use_attn_prior=1 like it says to do in the readME I get this output:

train_config.output_directory:outdir was not parsed
output_directory=outdir
output_directory:outdir was not parsed
data_config.use_attn_prior=1
use_attn_prior=1
{'train_config': {'output_directory': 'outdir', 'epochs': 1000, 'optim_algo': 'RAdam', 'learning_rate': 0.001, 'weight_decay': 1e-06, 'grad_clip_val': 1, 'sigma': 1.0, 'iters_per_checkpoint': 1000, 'batch_size': 6, 'seed': 1234, 'check
point_path': '', 'ignore_layers': [], 'finetune_layers': [], 'include_layers': ['speaker', 'encoder', 'embedding'], 'warmstart_checkpoint_path': '', 'with_tensorboard': True, 'fp16_run': True, 'gate_loss': True, 'use_ctc_loss': True, '
ctc_loss_weight': 0.01, 'blank_logprob': -8, 'ctc_loss_start_iter': 10000}, 'data_config': {'training_files': 'C:\Scripts\Voice\flowtron\filelists\list.txt', 'validation_files': 'C:\Scripts\Voice\flowtron\filelists
\list.txt', 'text_cleaners': ['flowtron_cleaners'], 'p_arpabet': 0.5, 'cmudict_path': 'data/cmudict_dictionary', 'sampling_rate': 22050, 'filter_length': 1024, 'hop_length': 256, 'win_length': 1024, 'mel_fmin': 0.0, 'mel_fmax': 8000.0
, 'max_wav_value': 32768.0, 'use_attn_prior': 1, 'attn_prior_threshold': 0.0, 'prior_cache_path': '/attention_prior_cache', 'betab_scaling_factor': 1.0, 'keep_ambiguous': False}, 'dist_config': {'dist_backend': 'nccl', 'dist_url': 'tcp
://localhost:54321'}, 'model_config': {'n_speakers': 1, 'n_speaker_dim': 128, 'n_text': 185, 'n_text_dim': 512, 'n_flows': 2, 'n_mel_channels': 80, 'n_attn_channels': 640, 'n_hidden': 1024, 'n_lstm_layers': 2, 'mel_encoder_n_hidden': 5
12, 'n_components': 0, 'mean_scale': 0.0, 'fixed_gaussian': True, 'dummy_speaker_embedding': False, 'use_gate_layer': True, 'use_cumm_attention': False}}

got rank 0 and world size 1 ...
Initializing RAdam optimizer
Flowtron(
(speaker_embedding): Embedding(1, 128)
(embedding): Embedding(185, 512)
(flows): ModuleList(
(0): AR_Step(
(conv): Conv1d(1024, 160, kernel_size=(1,), stride=(1,))
(lstm): LSTM(1664, 1024, num_layers=2)
(attention_lstm): LSTM(80, 1024)
(attention_layer): Attention(
(softmax): Softmax(dim=2)
(query): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=640, bias=False)
)
(key): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=640, bias=False)
)
(value): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=640, bias=False)
)
(v): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=1, bias=False)
)
)
(dense_layer): DenseLayer(
(layers): ModuleList(
(0): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=1024, bias=True)
)
(1): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=1024, bias=True)
)
)
)
)
(1): AR_Back_Step(
(ar_step): AR_Step(
(conv): Conv1d(1024, 160, kernel_size=(1,), stride=(1,))
(lstm): LSTM(1664, 1024, num_layers=2)
(attention_lstm): LSTM(80, 1024)
(attention_layer): Attention(
(softmax): Softmax(dim=2)
(query): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=640, bias=False)
)
(key): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=640, bias=False)
)
(value): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=640, bias=False)
)
(v): LinearNorm(
(linear_layer): Linear(in_features=640, out_features=1, bias=False)
)
)
(dense_layer): DenseLayer(
(layers): ModuleList(
(0): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=1024, bias=True)
)
(1): LinearNorm(
(linear_layer): Linear(in_features=1024, out_features=1024, bias=True)
)
)
)
(gate_layer): LinearNorm(
(linear_layer): Linear(in_features=1664, out_features=1, bias=True)
)
)
)
)
(encoder): Encoder(
(convolutions): ModuleList(
(0): Sequential(
(0): ConvNorm(
(conv): Conv1d(512, 512, kernel_size=(5,), stride=(1,), padding=(2,))
)
(1): InstanceNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
)
(1): Sequential(
(0): ConvNorm(
(conv): Conv1d(512, 512, kernel_size=(5,), stride=(1,), padding=(2,))
)
(1): InstanceNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
)
(2): Sequential(
(0): ConvNorm(
(conv): Conv1d(512, 512, kernel_size=(5,), stride=(1,), padding=(2,))
)
(1): InstanceNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=False)
)
)
(lstm): LSTM(512, 256, batch_first=True, bidirectional=True)
)
)
Number of speakers : 1
Number of speakers : 1
Setting up Tensorboard log in outdir\logs
Epoch: 0
C:\Scripts\Voice\flowtron\data.py:55: WavFileWarning: Chunk (non-data) not understood, skipping it.
sampling_rate, data = read(full_path)
C:\Scripts\Voice\flowtron\data.py:56: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array usin
g the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ..\torch\csrc\utils
\tensor_numpy.cpp:180.)
return torch.from_numpy(data).float(), sampling_rate
Traceback (most recent call last):
File "C:\Scripts\Voice\flowtron\train.py", line 415, in
train(n_gpus, rank, **train_config)
File "C:\Scripts\Voice\flowtron\train.py", line 281, in train
for batch in train_loader:
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data\dataloader.py", line 521, in next
data = self._next_data()
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
data.reraise()
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch_utils.py", line 425, in reraise
raise self.exc_type(msg)
IndexError: Caught IndexError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data_utils\worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data_utils\fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Users\user.conda\envs\tensor\lib\site-packages\torch\utils\data_utils\fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "C:\Scripts\Voice\flowtron\data.py", line 182, in getitem
attn_prior = self.compute_attention_prior(
File "C:\Scripts\Voice\flowtron\data.py", line 112, in compute_attention_prior
folder_path = audiopath.split('/')[-2]
IndexError: list index out of range