Commit d13a8ae8 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: Committing mostly script changes relating to my neural net setup.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@2819 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent abad6ae2
......@@ -3,119 +3,98 @@ for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; d
exit 0
# Monophone, MFCC+delta+accel
%WER 8.82 [ 1106 / 12533, 132 ins, 249 del, 725 sub ] exp/mono/decode/wer_3
%WER 8.58 [ 1075 / 12533, 137 ins, 230 del, 708 sub ] exp/mono/decode/wer_2
# MFCC+delta+accel
%WER 3.27 [ 410 / 12533, 59 ins, 85 del, 266 sub ] exp/tri1/decode/wer_6
%WER 3.41 [ 428 / 12533, 53 ins, 94 del, 281 sub ] exp/tri1/decode/wer_6
# MFCC+delta+accel (on top of better alignments, but didn't help).
%WER 3.23 [ 405 / 12533, 60 ins, 84 del, 261 sub ] exp/tri2a/decode/wer_6
# MFCC+delta+accel (on top of better alignments)
%WER 3.26 [ 409 / 12533, 54 ins, 87 del, 268 sub ] exp/tri2a/decode/wer_6
# LDA+MLLT
%WER 3.08 [ 386 / 12533, 41 ins, 89 del, 256 sub ] exp/tri2b/decode/wer_8
%WER 2.78 [ 349 / 12533, 52 ins, 66 del, 231 sub ] exp/tri2b/decode/wer_5
# Some MMI/MPE experiments (MMI, boosted MMI, MPE) on top of the LDA+MLLT system.
%WER 2.59 [ 325 / 12533, 49 ins, 55 del, 221 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_6
%WER 2.65 [ 332 / 12533, 55 ins, 48 del, 229 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_6
%WER 2.53 [ 317 / 12533, 42 ins, 57 del, 218 sub ] exp/tri2b_mmi/decode_it3/wer_7
%WER 2.67 [ 335 / 12533, 54 ins, 49 del, 232 sub ] exp/tri2b_mmi/decode_it4/wer_6
%WER 2.90 [ 364 / 12533, 48 ins, 68 del, 248 sub ] exp/tri2b_mpe/decode_it3/wer_7
%WER 2.88 [ 361 / 12533, 39 ins, 73 del, 249 sub ] exp/tri2b_mpe/decode_it4/wer_8
%WER 2.54 [ 318 / 12533, 57 ins, 45 del, 216 sub ] exp/tri2b_mmi/decode_it3/wer_5
%WER 2.66 [ 333 / 12533, 53 ins, 64 del, 216 sub ] exp/tri2b_mmi/decode_it4/wer_7
%WER 2.51 [ 314 / 12533, 58 ins, 45 del, 211 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_5
%WER 2.55 [ 319 / 12533, 56 ins, 54 del, 209 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_6
%WER 2.50 [ 313 / 12533, 39 ins, 65 del, 209 sub ] exp/tri2b_mpe/decode_it3/wer_7
%WER 2.50 [ 313 / 12533, 44 ins, 56 del, 213 sub ] exp/tri2b_mpe/decode_it4/wer_6
# LDA+MLLT+SAT
%WER 3.27 [ 410 / 12533, 37 ins, 113 del, 260 sub ] exp/tri3b/decode.si/wer_8
%WER 2.04 [ 256 / 12533, 34 ins, 47 del, 175 sub ] exp/tri3b/decode/wer_4
%WER 1.90 [ 238 / 12533, 26 ins, 50 del, 162 sub ] exp/tri3b/decode/wer_4
%WER 2.96 [ 371 / 12533, 54 ins, 62 del, 255 sub ] exp/tri3b/decode.si/wer_4 # This is the speaker-independent decoding pass.
# LDA+MLLT+SAT (on training set)
%WER 0.88 [ 307 / 34722, 44 ins, 67 del, 196 sub ] exp/tri3b/decode_train/wer_4
# Decoding tri3b with unigram language model, which has higher WER.
%WER 10.27 [ 1287 / 12533, 119 ins, 205 del, 963 sub ] exp/tri3b/decode_ug/wer_13
%WER 13.47 [ 1688 / 12533, 172 ins, 258 del, 1258 sub ] exp/tri3b/decode_ug.si/wer_12
# LDA+MLLT+SAT+MMI (on training set)
%WER 0.32 [ 112 / 34722, 12 ins, 26 del, 74 sub ] exp/tri3b_mmi/decode_train/wer_7
# LDA+MLLT+SAT+MMI (MMI on top of the SAT system)
%WER 3.27 [ 410 / 12533, 37 ins, 113 del, 260 sub ] exp/tri3b_mmi/decode.si/wer_8
%WER 1.87 [ 234 / 12533, 33 ins, 44 del, 157 sub ] exp/tri3b_mmi/decode/wer_6
%WER 2.96 [ 371 / 12533, 54 ins, 62 del, 255 sub ] exp/tri3b_mmi/decode.si/wer_4
%WER 1.73 [ 217 / 12533, 20 ins, 45 del, 152 sub ] exp/tri3b_mmi/decode/wer_7
%WER 1.84 [ 231 / 12533, 27 ins, 41 del, 163 sub ] exp/tri3b_mmi/decode2/wer_7 # with transforms from tri3b
# LDA+MLLT+SAT+fMMI (fMMI+MMI on top of this SAT system) Various configurations.
Note: it doesn't really help here. Probably not enough data.
%WER 1.89 [ 237 / 12533, 30 ins, 41 del, 166 sub ] exp/tri3b_fmmi_b/decode_it3/wer_7
%WER 2.13 [ 267 / 12533, 36 ins, 47 del, 184 sub ] exp/tri3b_fmmi_b/decode_it4/wer_8
%WER 2.11 [ 265 / 12533, 39 ins, 38 del, 188 sub ] exp/tri3b_fmmi_b/decode_it5/wer_7
%WER 2.18 [ 273 / 12533, 42 ins, 35 del, 196 sub ] exp/tri3b_fmmi_b/decode_it6/wer_7
%WER 2.29 [ 287 / 12533, 48 ins, 35 del, 204 sub ] exp/tri3b_fmmi_b/decode_it7/wer_7
%WER 2.51 [ 314 / 12533, 53 ins, 45 del, 216 sub ] exp/tri3b_fmmi_b/decode_it8/wer_8
%WER 1.87 [ 234 / 12533, 38 ins, 36 del, 160 sub ] exp/tri3b_fmmi_c/decode_it3/wer_4
%WER 1.90 [ 238 / 12533, 36 ins, 37 del, 165 sub ] exp/tri3b_fmmi_c/decode_it4/wer_5
%WER 1.82 [ 228 / 12533, 36 ins, 33 del, 159 sub ] exp/tri3b_fmmi_c/decode_it5/wer_4
%WER 1.76 [ 220 / 12533, 34 ins, 31 del, 155 sub ] exp/tri3b_fmmi_c/decode_it6/wer_5
%WER 1.81 [ 227 / 12533, 27 ins, 42 del, 158 sub ] exp/tri3b_fmmi_c/decode_it7/wer_8
%WER 1.82 [ 228 / 12533, 36 ins, 30 del, 162 sub ] exp/tri3b_fmmi_c/decode_it8/wer_5
%WER 1.99 [ 250 / 12533, 33 ins, 43 del, 174 sub ] exp/tri3b_fmmi_d/decode_it3/wer_7
%WER 2.12 [ 266 / 12533, 30 ins, 55 del, 181 sub ] exp/tri3b_fmmi_d/decode_it4/wer_9
%WER 2.08 [ 261 / 12533, 41 ins, 37 del, 183 sub ] exp/tri3b_fmmi_d/decode_it5/wer_6
%WER 2.16 [ 271 / 12533, 37 ins, 50 del, 184 sub ] exp/tri3b_fmmi_d/decode_it6/wer_7
%WER 2.22 [ 278 / 12533, 38 ins, 47 del, 193 sub ] exp/tri3b_fmmi_d/decode_it7/wer_7
%WER 2.35 [ 294 / 12533, 46 ins, 48 del, 200 sub ] exp/tri3b_fmmi_d/decode_it8/wer_7
# SGMM experiments.
%WER 1.69 [ 212 / 12533, 36 ins, 37 del, 139 sub ] exp/sgmm4a/decode/wer_3
%WER 1.70 [ 213 / 12533, 35 ins, 36 del, 142 sub ] exp/sgmm4a/decode_fmllr/wer_3
%WER 1.56 [ 196 / 12533, 30 ins, 34 del, 132 sub ] exp/sgmm4a_mmi_b0.2/decode_it1/wer_5
%WER 1.54 [ 193 / 12533, 31 ins, 32 del, 130 sub ] exp/sgmm4a_mmi_b0.2/decode_it2/wer_5
%WER 1.57 [ 197 / 12533, 31 ins, 31 del, 135 sub ] exp/sgmm4a_mmi_b0.2/decode_it3/wer_5
%WER 1.58 [ 198 / 12533, 32 ins, 32 del, 134 sub ] exp/sgmm4a_mmi_b0.2/decode_it4/wer_5
# Note: it doesn't really help here. Probably not enough data.
%WER 1.68 [ 210 / 12533, 26 ins, 35 del, 149 sub ] exp/tri3b_fmmi_b/decode_it3/wer_6
%WER 1.84 [ 231 / 12533, 35 ins, 31 del, 165 sub ] exp/tri3b_fmmi_b/decode_it4/wer_5
%WER 1.80 [ 226 / 12533, 31 ins, 35 del, 160 sub ] exp/tri3b_fmmi_b/decode_it5/wer_6
%WER 1.91 [ 239 / 12533, 39 ins, 35 del, 165 sub ] exp/tri3b_fmmi_b/decode_it6/wer_6
%WER 2.01 [ 252 / 12533, 20 ins, 52 del, 180 sub ] exp/tri3b_fmmi_b/decode_it7/wer_10
%WER 2.09 [ 262 / 12533, 33 ins, 43 del, 186 sub ] exp/tri3b_fmmi_b/decode_it8/wer_8
%WER 1.80 [ 226 / 12533, 30 ins, 38 del, 158 sub ] exp/tri3b_fmmi_c/decode_it3/wer_4
%WER 1.73 [ 217 / 12533, 28 ins, 38 del, 151 sub ] exp/tri3b_fmmi_c/decode_it4/wer_5
%WER 1.69 [ 212 / 12533, 24 ins, 38 del, 150 sub ] exp/tri3b_fmmi_c/decode_it5/wer_6
%WER 1.71 [ 214 / 12533, 24 ins, 37 del, 153 sub ] exp/tri3b_fmmi_c/decode_it6/wer_6
%WER 1.79 [ 224 / 12533, 31 ins, 37 del, 156 sub ] exp/tri3b_fmmi_c/decode_it7/wer_6
%WER 1.80 [ 226 / 12533, 37 ins, 31 del, 158 sub ] exp/tri3b_fmmi_c/decode_it8/wer_4
%WER 1.87 [ 234 / 12533, 20 ins, 45 del, 169 sub ] exp/tri3b_fmmi_d/decode_it3/wer_7
%WER 2.11 [ 265 / 12533, 29 ins, 47 del, 189 sub ] exp/tri3b_fmmi_d/decode_it4/wer_6
%WER 2.20 [ 276 / 12533, 37 ins, 48 del, 191 sub ] exp/tri3b_fmmi_d/decode_it5/wer_5
%WER 2.15 [ 270 / 12533, 17 ins, 69 del, 184 sub ] exp/tri3b_fmmi_d/decode_it6/wer_10
%WER 2.12 [ 266 / 12533, 14 ins, 71 del, 181 sub ] exp/tri3b_fmmi_d/decode_it7/wer_12
%WER 2.25 [ 282 / 12533, 17 ins, 65 del, 200 sub ] exp/tri3b_fmmi_d/decode_it8/wer_11
# Some "SGMM2" experiments. SGMM2 is a new version of the code that
# has tying of the substates a bit like "state-clustered tied mixture" systems;
# and which has speaker-dependent mixture weights.
# we don't any longer show the old SGMM results, although the script is still
# there, commented out.
%WER 1.63 [ 204 / 12533, 19 ins, 40 del, 145 sub ] exp/sgmm2_4a/decode/wer_5
%WER 1.65 [ 207 / 12533, 26 ins, 33 del, 148 sub ] exp/sgmm2_4a/decode_fmllr/wer_3
%WER 1.50 [ 188 / 12533, 18 ins, 33 del, 137 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it1/wer_6
%WER 1.49 [ 187 / 12533, 18 ins, 32 del, 137 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it2/wer_6
%WER 1.48 [ 186 / 12533, 21 ins, 27 del, 138 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it3/wer_5
%WER 1.47 [ 184 / 12533, 21 ins, 26 del, 137 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it4/wer_5
%WER 1.58 [ 198 / 12533, 22 ins, 33 del, 143 sub ] exp/sgmm2_4a/decode/wer_4
%WER 1.60 [ 200 / 12533, 27 ins, 29 del, 144 sub ] exp/sgmm2_4a/decode_fmllr/wer_3
%WER 1.50 [ 188 / 12533, 24 ins, 24 del, 140 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it1/wer_4
%WER 1.47 [ 184 / 12533, 27 ins, 21 del, 136 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it2/wer_3
%WER 1.44 [ 181 / 12533, 26 ins, 22 del, 133 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it3/wer_3
%WER 1.44 [ 181 / 12533, 31 ins, 17 del, 133 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it4/wer_2
# This is testing an option "--zero-if-disjoint true" to MMI-- no clear difference here.
%WER 1.50 [ 188 / 12533, 27 ins, 22 del, 139 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it1/wer_3
%WER 1.49 [ 187 / 12533, 18 ins, 32 del, 137 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it2/wer_6
%WER 1.48 [ 186 / 12533, 20 ins, 26 del, 140 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it3/wer_5
%WER 1.45 [ 182 / 12533, 20 ins, 26 del, 136 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it4/wer_5
# Note: sgmm2x is sgmm2 excluding the speaker-dependent mixture weights.
# doesn't make much difference.
%WER 1.63 [ 204 / 12533, 23 ins, 37 del, 144 sub ] exp/sgmm2x_4a/decode/wer_4
%WER 1.59 [ 199 / 12533, 22 ins, 37 del, 140 sub ] exp/sgmm2x_4a/decode_fmllr/wer_4
%WER 1.46 [ 183 / 12533, 24 ins, 24 del, 135 sub ] exp/sgmm2x_4a_mmi_b0.2/decode_it1/wer_4
%WER 1.44 [ 181 / 12533, 25 ins, 22 del, 134 sub ] exp/sgmm2x_4a_mmi_b0.2/decode_it2/wer_4
%WER 1.44 [ 180 / 12533, 26 ins, 21 del, 133 sub ] exp/sgmm2x_4a_mmi_b0.2/decode_it3/wer_4
%WER 1.43 [ 179 / 12533, 26 ins, 21 del, 132 sub ] exp/sgmm2x_4a_mmi_b0.2/decode_it4/wer_4
# Deep neural net -- hybrid system.
%WER 1.89 [ 237 / 12533, 28 ins, 62 del, 147 sub ] exp/tri4a1_nnet/decode/wer_6
%WER 0.59 [ 204 / 34722, 23 ins, 50 del, 131 sub ] exp/tri4a1_nnet/decode_train/wer_2
%WER 1.87 [ 234 / 12533, 34 ins, 45 del, 155 sub ] exp/tri4a1_mmi_a/decode/wer_4
%WER 1.85 [ 232 / 12533, 28 ins, 52 del, 152 sub ] exp/tri4a1_mmi_b/decode/wer_5
%WER 1.87 [ 234 / 12533, 28 ins, 52 del, 154 sub ] exp/tri4a1_mmi_c/decode/wer_5
%WER 0.51 [ 178 / 34722, 25 ins, 34 del, 119 sub ] exp/tri4a1_mmi_c/decode_train/wer_2
%WER 1.81 [ 227 / 12533, 26 ins, 49 del, 152 sub ] exp/tri4a1_mmi_d/decode/wer_5
%WER 0.54 [ 187 / 34722, 30 ins, 28 del, 129 sub ] exp/tri4a1_mmi_d/decode_train/wer_2
%WER 1.84 [ 231 / 12533, 39 ins, 41 del, 151 sub ] exp/tri4a1_mmi_e/decode/wer_4
%WER 0.51 [ 178 / 34722, 19 ins, 38 del, 121 sub ] exp/tri4a1_mmi_e/decode_train/wer_4
%WER 1.81 [ 227 / 12533, 37 ins, 37 del, 153 sub ] exp/tri4a1_mmi_e2/decode/wer_4
%WER 0.53 [ 184 / 34722, 14 ins, 47 del, 123 sub ] exp/tri4a1_mmi_e2/decode_train/wer_6
%WER 8.48 [ 1063 / 12533, 238 ins, 146 del, 679 sub ] exp/tri4a1_mmi_f/decode/wer_9
%WER 4.84 [ 1679 / 34722, 506 ins, 192 del, 981 sub ] exp/tri4a1_mmi_f/decode_train/wer_9
%WER 2.07 [ 260 / 12533, 51 ins, 29 del, 180 sub ] exp/tri4a1_mmi_g/decode/wer_3
%WER 0.61 [ 211 / 34722, 33 ins, 34 del, 144 sub ] exp/tri4a1_mmi_g/decode_train/wer_4
# Deep neural net -- various types of hybrid system.
%WER 2.02 [ 253 / 12533, 27 ins, 64 del, 162 sub ] exp/nnet4a/decode/wer_4
%WER 9.77 [ 1224 / 12533, 95 ins, 251 del, 878 sub ] exp/nnet4a/decode_ug/wer_9
%WER 1.84 [ 231 / 12533, 23 ins, 55 del, 153 sub ] exp/nnet4b/decode/wer_5
%WER 9.04 [ 1133 / 12533, 110 ins, 153 del, 870 sub ] exp/nnet4b/decode_ug/wer_7
%WER 1.71 [ 214 / 12533, 23 ins, 46 del, 145 sub ] exp/nnet4c/decode/wer_4
%WER 9.02 [ 1130 / 12533, 91 ins, 181 del, 858 sub ] exp/nnet4c/decode_ug/wer_8
# DNN systems (Karel)
# note from Dan-- these are from an older RESULTS file as I did not rerun these
# last time I created this.
# Per-frame cross-entropy training
%WER 1.58 [ 198 / 12533, 28 ins, 44 del, 126 sub ] exp/tri3b_pretrain-dbn_dnn/decode/wer_3
# Sequence-based sMBR training
......@@ -128,14 +107,8 @@ exit 0
# Some system combination experiments.
%WER 3.02 [ 378 / 12533, 58 ins, 68 del, 252 sub ] exp/combine_1_2a/decode/wer_4
%WER 1.64 [ 206 / 12533, 29 ins, 35 del, 142 sub ] exp/combine_4a_3b/decode/wer_2
%WER 1.60 [ 201 / 12533, 29 ins, 39 del, 133 sub ] exp/combine_4a_3b_fmmic5/decode/wer_4
%WER 1.58 [ 198 / 12533, 34 ins, 30 del, 134 sub ] exp/combine_4a_mmi_3b_fmmic5/decode/wer_3
%WER 1.58 [ 198 / 12533, 24 ins, 33 del, 141 sub ] exp/combine_sgmm2_4a_3b/decode/wer_2
%WER 1.54 [ 193 / 12533, 20 ins, 40 del, 133 sub ] exp/combine_sgmm2_4a_3b_fmmic5/decode/wer_5
%WER 1.49 [ 187 / 12533, 22 ins, 33 del, 132 sub ] exp/combine_sgmm2_4a_mmi_3b_fmmic5/decode/wer_5
%WER 1.60 [ 200 / 12533, 26 ins, 34 del, 140 sub ] exp/combine_sgmm2x_4a_3b/decode/wer_2
%WER 1.51 [ 189 / 12533, 23 ins, 34 del, 132 sub ] exp/combine_sgmm2x_4a_3b_fmmic5/decode/wer_4
%WER 1.48 [ 186 / 12533, 24 ins, 29 del, 133 sub ] exp/combine_sgmm2x_4a_mmi_3b_fmmic5/decode/wer_4
%WER 3.18 [ 398 / 12533, 60 ins, 75 del, 263 sub ] exp/combine_1_2a/decode/wer_4
%WER 1.56 [ 196 / 12533, 27 ins, 32 del, 137 sub ] exp/combine_sgmm2_4a_3b/decode/wer_2
%WER 1.53 [ 192 / 12533, 23 ins, 30 del, 139 sub ] exp/combine_sgmm2_4a_3b_fmmic5/decode/wer_4
%WER 1.47 [ 184 / 12533, 23 ins, 27 del, 134 sub ] exp/combine_sgmm2_4a_mmi_3b_fmmic5/decode/wer_4
......@@ -7,20 +7,20 @@
( steps/nnet2/train_tanh.sh --splice-width 7 \
--cleanup false \
--initial-learning-rate 0.08 --final-learning-rate 0.008 \
--initial-learning-rate 0.02 --final-learning-rate 0.004 \
--num-hidden-layers 2 \
--num-epochs-extra 10 --add-layers-period 1 \
--mix-up 4000 \
--cmd "$decode_cmd" \
--hidden-layer-dim 375 \
data/train data/lang exp/tri3c_ali exp/nnet4a || exit 1
data/train data/lang exp/tri3c_ali exp/nnet4a2 || exit 1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet4a/decode
exp/tri3c/graph data/test exp/nnet4a2/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
--transform-dir exp/tri3c/decode_ug \
exp/tri3c/graph_ug data/test exp/nnet4a/decode_ug
exp/tri3c/graph_ug data/test exp/nnet4a2/decode_ug
)
#!/bin/bash
stage=0
train_stage=-100
# This trains only unadapted (just cepstral mean normalized) features,
# and uses various combinations of VTLN warping factor and time-warping
# factor to artificially expand the amount of data.
. cmd.sh
. utils/parse_options.sh # to parse the --stage option, if given
[ $# != 0 ] && echo "Usage: local/run_4b.sh [--stage <stage> --train-stage <train-stage>]" && exit 1;
set -e
if false; then
if [ $stage -le 0 ]; then
# Create the training data.
featdir=`pwd`/mfcc/nnet4b; mkdir -p $featdir
mkdir -p conf/nnet4b
all_fbankdirs=""
......@@ -53,8 +61,8 @@ if false; then
fi
( steps/nnet2/train_block.sh --stage -3 \
if [ $stage -le 1 ]; then
steps/nnet2/train_block.sh --stage "$train_stage" \
--bias-stddev 0.5 --splice-width 7 --egs-opts "--feat-type raw" \
--softmax-learning-rate-factor 0.5 --cleanup false \
--initial-learning-rate 0.04 --final-learning-rate 0.004 \
......@@ -63,10 +71,30 @@ fi
--cmd "$decode_cmd" \
--hidden-layer-dim 450 \
data/nnet4b/train_fbank_all data/lang exp/tri3b_ali_nnet4b exp/nnet4b || exit 1
fi
if [ $stage -le 2 ]; then
# Create the testing data.
featdir=`pwd`/mfcc
mkdir -p $featdir
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
done
utils/combine_data.sh data/test_fbank data/test_{mar87,oct87,feb89,oct89,feb91,sep92}_fbank
steps/compute_cmvn_stats.sh data/test_fbank exp/make_fbank/test $featdir
fi
if [ $stage -le 3 ]; then
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
exp/tri3b/graph data/test_fbank exp/nnet4b/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
exp/tri3b/graph_ug data/test_fbank exp/nnet4b/decode_ug
)
fi
#!/bin/bash
# This is neural net training on top of adapted 40-dimensional features.
#
. cmd.sh
( steps/nnet2/train_tanh.sh --num-epochs 20 \
--num-epochs-extra 10 --add-layers-period 1 \
--num-hidden-layers 2 \
--mix-up 4000 \
--initial-learning-rate 0.02 --final-learning-rate 0.004 \
--cmd "$decode_cmd" \
--hidden-layer-dim 375 \
data/train data/lang exp/tri3b_ali exp/nnet4c_nnet
steps/decode_nnet_cpu.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/nnet4c_nnet/decode
steps/decode_nnet_cpu.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode_ug \
exp/tri3b/graph_ug data/test exp/nnet4c_nnet/decode_ug
)
#!/bin/bash
# "nnet2" is the new name for what used to be called the "nnet-cpu" code, and this
# script will eventually supersede run_nnet_cpu.sh [It's Dan's version of neural
# network training].
# We start from tri3c which is "raw-fMLLR" (a model with regular LDA+MLLT, but where
# the fMLLR is done in the space of the original features).
# This example runs on top of "raw-fMLLR" features:
local/nnet2/run_4a.sh
. cmd.sh
# This one is on top of filter-bank features, with only CMN.
local/nnet2/run_4b.sh
# The first training is with a small hidden-layer-dim and few epochs, just to
# get a good point to optimize from.
steps/nnet2/train_tanh.sh --num-epochs 4 --num-epochs-extra 2 --splice-width 7 \
--cleanup false \
--num-hidden-layers 3 --hidden-layer-dim 256 --add-layers-period 1 --cmd "$decode_cmd" \
data/train data/lang exp/tri3c_ali exp/nnet4c1
# This one is on top of 40-dim + fMLLR features
local/nnet2/run_4c.sh
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet4c1/decode
steps/nnet2/retrain_tanh.sh --num-epochs 10 --num-epochs-extra 10 \
--initial-learning-rate 0.08 --final-learning-rate 0.008 \
--widen 400 --cmd "$decode_cmd" exp/nnet4c1/egs exp/nnet4c1 exp/nnet5c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet5c1/decode
steps/nnet2/retrain_tanh.sh --num-epochs 10 --num-epochs-extra 10 \
--mix-up 4000 --initial-learning-rate 0.08 --final-learning-rate 0.008 \
--cmd "$decode_cmd" exp/nnet4c1/egs exp/nnet5c1 exp/nnet6c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet6c1/decode
steps/nnet2/align.sh --transform-dir exp/tri3c --nj 8 \
--cmd "$decode_cmd" \
data/train data/lang exp/nnet6c1 exp/nnet6c1_ali
steps/nnet2/get_egs.sh --cmd "$decode_cmd" --splice-width 7 \
--transform-dir exp/tri3c/ \
data/train data/lang exp/nnet6c1_ali exp/nnet6c1_realigned_egs
steps/nnet2/retrain_tanh.sh --num-epochs 5 --num-epochs-extra 10 \
--initial-learning-rate 0.04 --final-learning-rate 0.008 \
--cmd "$decode_cmd" exp/nnet6c1_realigned_egs/egs exp/nnet6c1 exp/nnet7c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet7c1/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode_ug \
exp/tri3c/graph_ug data/test exp/nnet7c1/decode_ug
exit 0;
# using conf/decode.config as we need much larger beams for RM.
steps/make_denlats_nnet_cpu.sh --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_denlats
steps/train_nnet_cpu_mmi.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_a
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_a/decode
(
steps/train_nnet_cpu_mmi.sh --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_b
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_b/decode
)&
# Get WER on training data before MMI.
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_nnet/decode_train
# WER on tri3b as baseline, want to see how it compares to tri3b_mmi
steps/decode.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri3b/decode_train
steps/decode.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri3b_mmi/decode_train
(
steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_c
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_c/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_c/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.5 --boost 0.1 --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_d
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_d/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_d/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.5 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_e
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_e/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_e/decode_train
)&
( # _e2 is as _e, but 2 epochs per EBW iter.
steps/train_nnet_cpu_mmi.sh --epochs-per-ebw-iter 2 --E 0.5 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_e2
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_e2/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_e2/decode_train
)&
( # With E = 0.0 it was terrible. WER is 12.5%
steps/train_nnet_cpu_mmi.sh --E 0.0 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_f
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_f/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_f/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.25 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_g
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_g/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_g/decode_train
)&
#!/bin/bash
# This example runs on top of "raw-fMLLR" features:
local/nnet2/run_4a.sh
# This one is on top of filter-bank features, with only CMN.
local/nnet2/run_4b.sh
# This one is on top of 40-dim + fMLLR features
local/nnet2/run_4c.sh
......@@ -194,4 +194,4 @@ local/run_sgmm2.sh
# The following script depends on local/run_raw_fmllr.sh having been run.
#
# local/run_nnet_cpu.sh
# local/run_nnet2.sh
#!/bin/bash
# This is neural net training on top of adapted 40-dimensional features.
#
. ./cmd.sh
(
steps/nnet2/train_tanh.sh \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-hidden-layers 4 --hidden-layer-dim 1024 \
--cmd "$decode_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c || exit 1
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c/decode_bd_tgpr_dev93
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/nnet5c/decode_bd_tgpr_eval92
)
......@@ -2,33 +2,8 @@
. ./cmd.sh
( # I'm using basically the same setup as for Switchboard 100 hours,
# but slightly fewer parameters (8M -> 7M) as we have slightly less
# data (81 hours).
steps/train_nnet_cpu.sh \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 16 --num-hidden-layers 4 \
--num-parameters 7000000 \
--cmd "$decode_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/nnet5c1 || exit 1
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 10 \
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/nnet5c1/decode_bd_tgpr_eval92
)
# ...
local/nnet2/run_5c.sh
# (
# steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.001 \
# --minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
# data/train data/lang exp/tri5c1_nnet exp/tri5c1_nnet exp/tri5c1_denlats exp/tri5c1_mmi_a
# steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
# --transform-dir exp/tri3b/decode \
# exp/tri3b/graph data/test exp/tri5c1_mmi_a/decode
# )&
......@@ -24,6 +24,7 @@ stage=0
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
splice_width=4 # meaning +- 4 frames on each side for second LDA
spk_vecs_dir=
random_copy=false
echo "$0 $@" # Print the command line for logging
......@@ -239,17 +240,19 @@ if [ $stage -le 4 ]; then
echo "Since iters-per-epoch == 1, just concatenating the data."
for n in `seq 1 $num_jobs_nnet`; do
cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
rm $dir/egs/egs_orig.$n.*.ark || exit 1;
rm $dir/egs/egs_orig.$n.*.ark # don't "|| exit 1", due to NFS bugs...
done
else # We'll have to split it up using nnet-copy-egs.
egs_list=
for n in `seq 0 $[$iters_per_epoch-1]`; do
egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
done
# note, the "|| true" below is a workaround for NFS bugs
# we encountered running this script with Debian-7, NFS-v4.
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
nnet-copy-egs --random=$random_copy --srand=JOB \
"ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
'(' rm $dir/egs/egs_orig.JOB.*.ark '||' true ')' || exit 1;
fi
fi