# Set path to CUDA, NCCL
CUDAROOT=/usr/local/cuda
NCCL_ROOT=/usr/local/nccl
export CPATH=$NCCL_ROOT/include:$CPATH
export LD_LIBRARY_PATH=$NCCL_ROOT/lib/:$CUDAROOT/lib64:$LD_LIBRARY_PATH
export LIBRARY_PATH=$NCCL_ROOT/lib/:$LIBRARY_PATH
export CUDA_HOME=$CUDAROOT
export CUDA_PATH=$CUDAROOT
export CPATH=$CUDA_PATH/include:$CPATH # for warp-rnnt
# Install miniconda, python libraries, and other tools
cd tools
make KALDI=/path/to/kaldi
- AMI
- CSJ
- Librispeech
- Switchboard (+ Fisher)
- TEDLIUM2
- TEDLIUM3
- TIMIT
- WSJ
- Penn Tree Bank
- WikiText2
- CNN encoder
- (Bidirectional/unidirectional) LSTM encoder
- CNN+(bidirectional/unidirectional) LSTM encoder
- Self-attention (Transformer) encoder [link]
- Time-Depth Seprarabel (TDS) convolutional encoder [link]
- Gated CNN encoder (GLU) [link]
- Shallow fusion
- RNN decoder
- Transformer decoder
- RNN transducer [link]
- RNNLM (recurrent neural network language model)
- Gated convolutional LM [link]
- Transformer LM
- Phoneme
- Grapheme
- Wordpiece (BPE, sentencepiece)
- Word
- Word-char mix
Multi-task learning (MTL) with different units are supported to alleviate data sparseness.
- Hybrid CTC/attention [link]
- Hierarchical Attention (e.g., word attention + character attention) [link]
- Hierarchical CTC (e.g., word CTC + character CTC) [link]
- Hierarchical CTC+Attention (e.g., word attention + character CTC) [link]
- Forward-backward attention [link]
- RNNLM objective [link]
model | test_dev93 | test_eval92 |
---|---|---|
BPE1k LAS + CTC + RNNLM | 8.8 | 6.2 |
model | eval1 | eval2 | eval3 |
---|---|---|---|
BPE10k LAS + char CTC init. + add 2L + RNNLM | 7.4/5.8 | 5.7/4.5 | 6.0/4.5 |
model | SWB | CH |
---|---|---|
BPE10k LAS + RNNLM | 11.1 | 22.2 |
model | dev-clean | dev-other | test-clean | test-other |
---|---|---|---|---|
BPE30k LAS + RNNLM | 3.6 | 11.2 | 3.9 | 12.2 |
model | dev | test |
---|---|---|
BPE10k LAS + RNNLM | 10.9 | 11.2 |
model | valid | test |
---|---|---|
RNNLM | 87.99 | 86.06 |
+ cache=100 | 79.58 | 79.12 |
+ cache=500 | 77.36 | 76.94 |
model | valid | test |
---|---|---|
RNNLM | 104.53 | 98.73 |
+ cache=100 | 90.86 | 85.87 |
+ cache=2000 | 76.10 | 72.77 |