Infer rnnt onnx wrong
pengaoao opened this issue · 1 comments
the pretrained model got from
https://github.com/espnet/espnet/tree/master/egs2/librispeech/asr1
Conformer-RNN Transducer
Environments
date: Wed Apr 27 09:30:57 EDT 2022
python version: 3.8.5 (default, Sep 4 2020, 07:30:14) [GCC 7.3.0]
espnet version: espnet 0.10.7a1
pytorch version: pytorch 1.8.1+cu102
Git hash: 21d19be00089678ca27f7fce474ef8d787689512
Commit date: Wed Mar 16 08:06:52 2022 -0400
ASR config: conf/tuning/transducer/train_conformer-rnn_transducer.yaml
Decode config: conf/tuning/transducer/decode.yaml
Pretrained model: https://huggingface.co/espnet/chai_librispeech_asr_train_conformer-rnn_transducer_raw_en_bpe5000_sp
export onnx script:
from espnet2.bin.asr_inference import Speech2Text
import os
import yaml
from espnet_onnx.export import ASRModelExport
from pathlib import Path
m = ASRModelExport()
m.set_export_config(max_seq_len=5000)
transducer_conf = yaml.safe_load(Path('espnet/egs2/librispeech/asr1/conf/decode_rnnt_conformer.yaml').read_text())
speech2text = Speech2Text(asr_train_config="test/espnet_onnx2/rnnt/exp/asr_train_rnnt_conformer_ngpu4_raw_en_bpe5000_sp/config.yaml",
asr_model_file="espnet_onnx2/rnnt/exp/asr_train_rnnt_conformer_ngpu4_raw_en_bpe5000_sp/valid.loss.ave_10best.pth",
transducer_conf=transducer_conf["transducer_conf"],
lm_train_config = "test/espnet_onnx2/rnnt/lm_config/config.yaml",
lm_file="test/espnet_onnx2/rnnt/lm_config/17epoch.pth",
lm_weight=0.0
)
m.export(speech2text, 'onnx_export', quantize=False)
the infer script is:
import librosa
from espnet_onnx import Speech2Text
#from pyacl.acl_infer import init_acl, release_acl
from tqdm import tqdm
import os,re
def findAllFile(base):
for root, ds, fs in os.walk(base):
for f in fs:
if re.match(r'.\d.', f) and f.endswith("flac"):
fullname = os.path.join(root, f)
yield fullname
#init_acl(0)
#speech2text = Speech2Text(tag_name='')
speech2text = Speech2Text(model_dir='/root/.cache/espnet_onnx/onnx_export/')
path = "espnet/egs2/librispeech/asr1/downloads/LibriSpeech/test-clean/"
j = 0
with open("test2.txt", 'w') as fout:
for i in findAllFile(path):
y, sr = librosa.load(i, sr=16000)
nbest = speech2text(y)
res = ""
res = "".join(nbest[0][1])
fout.write('{} {}\n'.format(i.split('/')[-1].split('.')[0], res))
the ERROR is:
root@ubuntu:/home/test/espnet_onnx2/rnnt# python3 infer.py
Traceback (most recent call last):
File "infer.py", line 24, in
nbest = speech2text(y)
File "/usr/local/python3.7.5/lib/python3.7/site-packages/espnet_onnx/asr/asr_model.py", line 84, in call
nbest_hyps = self.beam_search(enc[0])[:1]
File "/usr/local/python3.7.5/lib/python3.7/site-packages/espnet_onnx/asr/beam_search/beam_search_transducer.py", line 111, in call
nbest_hyps = self.search_algorithm(enc_out)
File "/usr/local/python3.7.5/lib/python3.7/site-packages/espnet_onnx/asr/beam_search/beam_search_transducer.py", line 238, in default_beam_search
lm_tokens, max_hyp.lm_state, None
File "/usr/local/python3.7.5/lib/python3.7/site-packages/espnet_onnx/asr/model/lms/transformer_lm.py", line 62, in score
k: v for k, v in zip(self.enc_in_cache_names, state)
TypeError: zip argument #2 must support iteration
I found the script run function score in espnet_onnx/asr/model/lms/transformer_lm.py, but the state is None, that may be the reason for this ERROR
@pengaoao Thank you for reporting the issue.
You are correct, I think we need to add if
sentence here to avoid state=None case.