mravanelli/pytorch-kaldi

Use arch_pretrain_file and got cuda runtime error

gezhaoDL opened this issue · 1 comments

Hi sir.Sorry for bother you again.
I try to use the arch_pretrain_file to fine-tuning the liGRU network.The final_architecture*.pkl is trained with aishell dataset.And then i change the dataset to thchs30,when i run the run_exp.py with new cfg file,I got an error like this.
error

Here is my cfg file.
`[cfg_proto]
cfg_proto = proto/global.proto
cfg_proto_chunk = proto/global_chunk.proto

[exp]
cmd =
run_nn_script = run_nn
out_folder = exp/THCHS30_liGRU_mfcc_change_tuning
seed = 2234
use_cuda = True
multi_gpu = False
save_gpumem = False
n_epochs_tr = 24
production = False

[dataset1]
data_name = TH_tr
fea = fea_name=mfcc
fea_lst=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/train/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/data/train/utt2spk ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/mfcc/train/cmvn_train.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
cw_left=0
cw_right=0
lab = lab_name=lab_cd
lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/train/
lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph

    lab_name=lab_mono
    lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali
    lab_opts=ali-to-phones --per-frame=true
    lab_count_file=none
    lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/train/
    lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph

n_chunks = 5

[dataset2]
data_name = TH_dev
fea = fea_name=mfcc
fea_lst=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/dev/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/data/dev/utt2spk ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/mfcc/dev/cmvn_dev.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
cw_left=0
cw_right=0
lab = lab_name=lab_cd
lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali_dev
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/dev/
lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph

    lab_name=lab_mono
    lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali
    lab_opts=ali-to-phones --per-frame=true
    lab_count_file=none
    lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/train/
    lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph

n_chunks = 1
[dataset3]
data_name = TH_test
fea = fea_name=mfcc
fea_lst=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/test/feats.scp
fea_opts=apply-cmvn --utt2spk=ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/data/test/utt2spk ark:/home/rd/gezhao/kaldi/egs/thchs30/s5/mfcc/test/cmvn_test.ark ark:- ark:- | add-deltas --delta-order=2 ark:- ark:- |
cw_left=0
cw_right=0
lab = lab_name=lab_cd
lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali_test
lab_opts=ali-to-pdf
lab_count_file=auto
lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/test/
lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph
lab_name=lab_mono
lab_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/exp/tri3b_ali_test
lab_opts=ali-to-phones --per-frame=true
lab_count_file=none
lab_data_folder=/home/rd/gezhao/kaldi/egs/thchs30/s5/data/mfcc/test/
lab_graph=/home/rd/gezhao/kaldi/egs/thchs30/s5/graph
n_chunks = 1

[data_use]
train_with = TH_tr
valid_with = TH_dev
forward_with = TH_test

[batches]
batch_size_train = 8
max_seq_length_train = 1000
increase_seq_length_train = True
start_seq_len_train = 100
multply_factor_seq_len_train = 2
batch_size_valid = 8
max_seq_length_valid = 1000

[architecture1]
arch_name = liGRU_layers
arch_proto = proto/liGRU.proto
arch_library = neural_networks
arch_class = liGRU
arch_pretrain_file = /home/rd/gezhao/pytorch-kaldi/exp/aishell_liGRU_mfcc/exp_files/final_architecture1.pkl
arch_freeze = False
arch_seq_model = True
ligru_lay = 550,550,550,550,550
ligru_drop = 0.2,0.2,0.2,0.2,0.2
ligru_use_laynorm_inp = False
ligru_use_batchnorm_inp = False
ligru_use_laynorm = False,False,False,False,False
ligru_use_batchnorm = True,True,True,True,True
ligru_bidir = True
ligru_act = relu,relu,relu,relu,relu
ligru_orthinit = True
arch_lr = 0.0004
arch_halving_factor = 0.5
arch_improvement_threshold = 0.001
arch_opt = rmsprop
opt_momentum = 0.0
opt_alpha = 0.95
opt_eps = 1e-8
opt_centered = False
opt_weight_decay = 0.0

[architecture2]
arch_name = MLP_layers
arch_proto = proto/MLP.proto
arch_library = neural_networks
arch_class = MLP
arch_pretrain_file = /home/rd/gezhao/pytorch-kaldi/exp/aishell_liGRU_mfcc/exp_files/final_architecture2.pkl
arch_freeze = False
arch_seq_model = False
dnn_lay = 3000
dnn_drop = 0.0
dnn_use_laynorm_inp = False
dnn_use_batchnorm_inp = False
dnn_use_batchnorm = False
dnn_use_laynorm = False
dnn_act = softmax
arch_lr = 0.0004
arch_halving_factor = 0.5
arch_improvement_threshold = 0.001
arch_opt = rmsprop
opt_momentum = 0.0
opt_alpha = 0.95
opt_eps = 1e-8
opt_centered = False
opt_weight_decay = 0.0

[architecture3]
arch_name = MLP_layers2
arch_proto = proto/MLP.proto
arch_library = neural_networks
arch_class = MLP
arch_pretrain_file = /home/rd/gezhao/pytorch-kaldi/exp/aishell_liGRU_mfcc/exp_files/final_architecture3.pkl
arch_freeze = False
arch_seq_model = False
dnn_lay = 217
dnn_drop = 0.0
dnn_use_laynorm_inp = False
dnn_use_batchnorm_inp = False
dnn_use_batchnorm = False
dnn_use_laynorm = False
dnn_act = softmax
arch_lr = 0.0004
arch_halving_factor = 0.5
arch_improvement_threshold = 0.001
arch_opt = rmsprop
opt_momentum = 0.0
opt_alpha = 0.95
opt_eps = 1e-8
opt_centered = False
opt_weight_decay = 0.0

[model]
model_proto = proto/model.proto
model = out_dnn1=compute(liGRU_layers,mfcc)
out_dnn2=compute(MLP_layers,out_dnn1)
out_dnn3=compute(MLP_layers2,out_dnn1)
loss_mono=cost_nll(out_dnn3,lab_mono)
loss_mono_w=mult_constant(loss_mono,1.0)
loss_cd=cost_nll(out_dnn2,lab_cd)
loss_final=sum(loss_cd,loss_mono_w)
err_final=cost_err(out_dnn2,lab_cd)

[forward]
forward_out = out_dnn2
normalize_posteriors = True
normalize_with_counts_from = lab_cd
save_out_file = False
require_decoding = True

[decoding]
decoding_script_folder = kaldi_decoding_scripts/
decoding_script = decode_dnn.sh
decoding_proto = proto/decoding.proto
min_active = 200
max_active = 7000
max_mem = 50000000
beam = 13.0
latbeam = 8.0
acwt = 0.2
max_arcs = -1
skip_scoring = false
scoring_script = local/score_thchs30.sh
scoring_opts = "--min-lmwt 1 --max-lmwt 10"
norm_vars = False `

Hey! This is related to a different number of output targets.