run.sh 14 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
#!/bin/bash

# Apache2.0 
# Prepared by Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin)
#

. cmd.sh

mkdir data data/train data/eval

### Data preparation - Training data, evaluation data. Please refer http://kaldi.sourceforge.net/data_prep.html as well
utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang
utils/prepare_lang.sh data/local/dict.closelm "UNKNOWNGMM" data/local/lang.closelm data/lang.closelm
local/p1_format_data.sh data/lang data/lang_test data/local/lang/conv2_ears_16kwl.tg.gz
local/p1_format_data.sh data/lang.closelm data/lang_test_closelm data/local/lang/close_conv_ears_16kwl.tg.gz

### Feature extraction (training data)
mfccdir=mfcc
steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir || exit 1;
utils/utt2spk_to_spk2utt.pl data/train/utt2spk > data/train/spk2utt
steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir

utils/fix_data_dir.sh data/train

### Feature extraction (evaluation data)
steps/make_mfcc.sh --cmd "$train_cmd" --nj 2 data/eval exp/make_mfcc/eval $mfccdir || exit 1;
utils/utt2spk_to_spk2utt.pl data/eval/utt2spk > data/eval/spk2utt
steps/compute_cmvn_stats.sh data/eval exp/make_mfcc/eval $mfccdir || exit 1;

utils/fix_data_dir.sh data/eval 

### We start acoustic model training here, build from HMM-GMM
### Mono phone training
steps/train_mono.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/mono0a || exit 1;
steps/align_si.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/mono0a exp/mono0a_ali

### Tri phone training
steps/train_deltas.sh --cmd "$train_cmd"  2500 20000 data/train data/lang exp/mono0a_ali exp/tri1 
steps/align_si.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_ali || exit 1;

utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
utils/mkgraph.sh data/lang_test_closelm exp/tri1 exp/tri1/graph_closelm
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri1/graph data/eval exp/tri1/decode_eval
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri1/graph_closelm data/eval exp/tri1/decode_eval_closelm

### Tri phone training (better alignment)
steps/train_deltas.sh --cmd "$train_cmd" 2500 20000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1;
steps/align_si.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/tri2 exp/tri2_ali || exit 1;

utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph
utils/mkgraph.sh data/lang_test_closelm exp/tri2 exp/tri2/graph_closelm
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri2/graph data/eval exp/tri2/decode_eval
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri2/graph_closelm data/eval exp/tri2/decode_eval_closelm

### Training with LDA+MLLT feature spaces transformation
steps/train_lda_mllt.sh --cmd "$train_cmd"  --splice-opts "--left-context=3 --right-context=3"  2500 20000 data/train data/lang exp/tri2_ali exp/tri3a || exit 1;

utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph
utils/mkgraph.sh data/lang_test_closelm exp/tri3a exp/tri3a/graph_closelm
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri3a/graph data/eval exp/tri3a/decode_eval
steps/decode.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri3a/graph_closelm data/eval exp/tri3a/decode_eval_closelm

### SAT (speaker adaptive training)
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/tri3a exp/tri3a_ali || exit 1;
steps/train_sat.sh  --cmd "$train_cmd" 4000 100000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1;
steps/train_sat.sh  --cmd "$train_cmd" 2500 20000 data/train data/lang exp/tri3a_ali_100k exp/tri4a_20k || exit 1; 

utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph 
utils/mkgraph.sh data/lang_test_closelm exp/tri4a exp/tri4a/graph_closelm 
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri4a/graph data/eval exp/tri4a/decode_eval 
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri4a/graph_closelm data/eval exp/tri4a/decode_eval_closelm 

utils/mkgraph.sh data/lang_test exp/tri4a_20k exp/tri4a_20k/graph 
utils/mkgraph.sh data/lang_test_closelm exp/tri4a_20k exp/tri4a_20k/graph_closelm
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri4a_20k/graph data/eval exp/tri4a_20k/decode_eval
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri4a_20k/graph_closelm data/eval exp/tri4a_20k/decode_eval_closelm

### SAT (speaker adaptive training on 100K model, with better alignment)
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/tri4a exp/tri4a_ali_100k
steps/train_sat.sh --cmd "$train_cmd" 4000 100000 data/train data/lang exp/tri4a_ali_100k exp/tri5a || exit 1;

utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph &
utils/mkgraph.sh data/lang_test_closelm exp/tri5a exp/tri5a/graph_closelm &
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri5a/graph data/eval exp/tri5a/decode_eval &
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" --config conf/decode.config exp/tri5a/graph_closelm data/eval exp/tri5a/decode_eval_closelm &


### Discriminative training
## (feature-space MMI + boosted MMI) 
steps/align_fmllr.sh --nj 25 --cmd "$train_cmd" data/train data/lang exp/tri5a exp/tri5a_ali_dt100k || exit 1;
steps/make_denlats.sh --nj 25 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali_dt100k --config conf/decode.config --sub-split 25 data/train data/lang exp/tri5a exp/tri5a_denlats_dt100k  || exit 1;
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 25 --cmd "$train_cmd" 800 data/train data/lang exp/tri5a_ali_dt100k exp/tri5a_dubm_dt
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri5a_ali_dt100k exp/tri5a_dubm_dt exp/tri5a_denlats_dt100k exp/tri5a_fmmi_b0.1 || exit 1;

for n in 1 2 3 4 5 6 7 8 ; do 
steps/decode_fmmi.sh --nj 2 --cmd run.pl --iter $n --config conf/decode.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/tri5a_fmmi_b0.1/decode_eval_iter${n} & 
steps/decode_fmmi.sh --nj 2 --cmd run.pl --iter $n --config conf/decode.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/tri5a_fmmi_b0.1/decode_eval_closelm_iter${n} & 
done

## (boosted MMI only) (***remark: the lattices don't necessary re-generate again as in below two lines as exp/tri5a_ali_dt100k generated already)
steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" data/train data/lang exp/tri5a exp/tri5a_ali_100k || exit 1;
steps/make_denlats.sh --nj 40 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali_100k --config conf/decode.config --sub-split 40 data/train data/lang exp/tri5a exp/tri5a_denlats_100k  || exit 1;
steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 data/train data/lang exp/tri5a_{ali,denlats}_100k exp/tri5a_mmi_b0.1 || exit 1;

for n in 1 2 3 4; do 
steps/decode.sh --nj 2 --iter $n --cmd "$decode_cmd" --config conf/decode.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/tri5a_mmi_b0.1/decode_eval$n & 
steps/decode.sh --nj 2 --iter $n --cmd "$decode_cmd" --config conf/decode.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/tri5a_mmi_b0.1/decode_eval_closelm$n & 
done

## SGMM (subspace gaussian mixture model), excluding the "speaker-dependent weights"
steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 800 data/train data/lang exp/tri5a_ali_dt100k exp/ubm5a || exit 1;
steps/train_sgmm.sh  --cmd "$train_cmd" 4500 40000 data/train data/lang exp/tri5a_ali_dt100k exp/ubm5a/final.ubm exp/sgmm_5a || exit 1;

utils/mkgraph.sh data/lang_test_closelm exp/sgmm_5a exp/sgmm_5a/graph_closelm
utils/mkgraph.sh data/lang_test exp/sgmm_5a exp/sgmm_5a/graph
steps/decode_sgmm.sh --nj 2 --cmd "$decode_cmd" --transform-dir exp/tri5a/decode_eval_closelm exp/sgmm_5a/graph_closelm data/eval exp/sgmm_5a/decode_eval_closelm
steps/decode_sgmm.sh --nj 2 --cmd "$decode_cmd" --transform-dir exp/tri5a/decode_eval exp/sgmm_5a/graph data/eval exp/sgmm_5a/decode_eval

 # boosted MMI on SGMM
steps/align_sgmm.sh --nj 25 --cmd "$train_cmd" --transform-dir exp/tri5a_ali_dt100k  --use-graphs true --use-gselect true data/train data/lang exp/sgmm_5a exp/sgmm_5a_ali
steps/make_denlats_sgmm.sh --nj 25 --sub-split 25 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali_dt100k data/train data/lang exp/sgmm_5a_ali exp/sgmm_5a_denlats
steps/train_mmi_sgmm.sh --cmd "$decode_cmd" --transform-dir exp/tri5a_ali_dt100k --boost 0.1 data/train data/lang exp/sgmm_5a_ali exp/sgmm_5a_denlats exp/sgmm_5a_mmi_b0.1

for n in 1 2 3 4; do
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $n --transform-dir exp/tri5a/decode_eval_closelm data/lang_test_closelm data/eval exp/sgmm_5a/decode_eval_closelm exp/sgmm_5a_mmi_b0.1/decode_eval_closelm$n
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $n --transform-dir exp/tri5a/decode_eval data/lang_test data/eval exp/sgmm_5a/decode_eval exp/sgmm_5a_mmi_b0.1/decode_eval$n
done

### Neural Network (on top of LDA+MLLT+SAT model)
steps/train_nnet_cpu.sh --mix-up 8000 --initial-learning-rate 0.01 --final-learning-rate 0.001 --num-jobs-nnet 16 --num-hidden-layers 6 --num-parameters 8000000 --cmd "$decode_cmd" data/train data/lang exp/tri5a exp/nnet_8m_6l

 # decoding on final model for NN
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_8m_6l/decode_eval 
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_8m_6l/decode_eval_closelm 

 # better analysis, this explains why we need to have average parameters in the last ten iterations
for n in 290 280 270 260 250 240 230 220 210 200 150 100 50; do
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --iter $n --config conf/decode.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_8m_6l/decode_eval_iter${n} &
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --iter $n --config conf/decode.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_8m_6l/decode_eval_closelm_iter${n} &
done

142
 # GPU based DNN traing, this was run on CentOS 6.4 with CUDA 5.0
143
 # 6 layers DNN pretrained with restricted boltzmann machine, frame level cross entropy training, sequence discriminative training with sMBR criterion
144
local/run_dnn.sh
145 146 147
 # decoding was run by CPUs
 # decoding using DNN with cross-entropy training 
dir=exp/tri5a_pretrain-dbn_dnn
148 149
steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 exp/tri5a/graph data-fmllr-tri5a/test $dir/decode || exit 1;
steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 exp/tri5a/graph_closelm data-fmllr-tri5a/test $dir/decode_closelm || exit 1;
150 151
 # decoding using DNN with sequence discriminative training (sMBR criterion)
dir=exp/tri5a_pretrain-dbn_dnn_smbr
152
for ITER in 1 2 3 4; do
153 154 155 156
 steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --nnet $dir/${ITER}.nnet --acwt 0.1 exp/tri5a/graph data-fmllr-tri5a/test $dir/decode_it${ITER} &
 steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --nnet $dir/${ITER}.nnet --acwt 0.1 exp/tri5a/graph_closelm data-fmllr-tri5a/test $dir/decode_closelm_it${ITER} &
done

157

158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
### Scoring ###
local/ext/score.sh data/eval exp/tri1/graph exp/tri1/decode_eval
local/ext/score.sh data/eval exp/tri1/graph_closelm exp/tri1/decode_eval_closelm

local/ext/score.sh data/eval exp/tri2/graph exp/tri2/decode_eval
local/ext/score.sh data/eval exp/tri2/graph_closelm exp/tri2/decode_eval_closelm

local/ext/score.sh data/eval exp/tri3a/graph exp/tri3a/decode_eval
local/ext/score.sh data/eval exp/tri3a/graph_closelm exp/tri3a/decode_eval_closelm

local/ext/score.sh data/eval exp/tri4a/graph exp/tri4a/decode_eval
local/ext/score.sh data/eval exp/tri4a/graph_closelm exp/tri4a/decode_eval_closelm

local/ext/score.sh data/eval exp/tri4a_20k/graph exp/tri4a_20k/decode_eval
local/ext/score.sh data/eval exp/tri4a_20k/graph_closelm exp/tri4a_20k/decode_eval_closelm

local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a/decode_eval_closelm

for n in 1 2 3 4 5 6 7 8; do local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_fmmi_b0.1/decode_eval_iter$n; done
for n in 1 2 3 4 5 6 7 8; do local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_fmmi_b0.1/decode_eval_closelm_iter$n; done

local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_mmi_b0.1/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_mmi_b0.1/decode_eval_closelm

for n in 1 2 3 4; do local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_mmi_b0.1/decode_eval$n; done
for n in 1 2 3 4; do local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_mmi_b0.1/decode_eval_closelm$n; done

186 187
local/ext/score.sh data/eval exp/sgmm_5a/graph exp/sgmm_5a/decode_eval;
local/ext/score.sh data/eval exp/sgmm_5a/graph_closelm exp/sgmm_5a/decode_eval_closelm;
188 189

for n in 1 2 3 4; do 
190 191
 local/ext/score.sh data/eval exp/sgmm_5a/graph exp/sgmm_5a_mmi_b0.1/decode_eval$n;
 local/ext/score.sh data/eval exp/sgmm_5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode_eval_closelm$n;
192 193 194 195 196 197 198 199 200 201
done

local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_8m_6l/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_eval_closelm

for n in 290 280 270 260 250 240 230 220 210 200 150 100 50; do 
  local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_8m_6l/decode_eval_iter${n}; 
  local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_eval_closelm_iter${n}; 
done

202 203 204
local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_pretrain-dbn_dnn/decode
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn/decode_closelm

205
for ITER in 1 2 3 4; do
206 207 208 209 210
 local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_pretrain-dbn_dnn_smbr/decode_it${ITER}
 local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it${ITER}
done