Commit ef9a515d authored by Dan Povey's avatar Dan Povey
Browse files

Minor changes to scripts, mainly RE mixing-up commands.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@636 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 6b07831c
...@@ -120,6 +120,15 @@ scripts/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph ...@@ -120,6 +120,15 @@ scripts/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" \ scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" \
steps/decode_lda_mllt_sat.sh exp/tri5a/graph data/eval2000 exp/tri5a/decode_eval2000 steps/decode_lda_mllt_sat.sh exp/tri5a/graph data/eval2000 exp/tri5a/decode_eval2000
( # Try mixing up from the 5a system to see if more Gaussians helps.
steps/mixup_lda_etc.sh --num-jobs 30 --cmd "$train_cmd" \
175000 data/train_nodup exp/tri5a exp/tri4a_ali_all_nodup exp/tri5a_175k
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri5a/graph \
data/eval2000 exp/tri5a_175k/decode_eval2000
)
# Align the 5a system; we'll train an SGMM system on top of # Align the 5a system; we'll train an SGMM system on top of
# LDA+MLLT+SAT, and use 5a system for 1st pass. # LDA+MLLT+SAT, and use 5a system for 1st pass.
steps/align_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \ steps/align_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \
...@@ -166,6 +175,9 @@ steps/train_lda_etc_mmi.sh --boost 0.1 --num-jobs 40 --cmd "$train_cmd" \ ...@@ -166,6 +175,9 @@ steps/train_lda_etc_mmi.sh --boost 0.1 --num-jobs 40 --cmd "$train_cmd" \
scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri5a/graph \ scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri5a/graph \
data/eval2000 exp/tri5a_mmi_b0.1/decode_eval2000 exp/tri5a/decode_eval2000 data/eval2000 exp/tri5a_mmi_b0.1/decode_eval2000 exp/tri5a/decode_eval2000
# getting results (see RESULTS file) # getting results (see RESULTS file)
for x in exp/*/decode_*; do [ -d $x ] && grep Mean $x/score_*/*.sys | scripts/best_wer.sh; done for x in exp/*/decode_*; do [ -d $x ] && grep Mean $x/score_*/*.sys | scripts/best_wer.sh; done
......
...@@ -260,7 +260,12 @@ scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \ ...@@ -260,7 +260,12 @@ scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
12500 data/train_si84 exp/sgmm4b exp/tri3b_ali_si84 exp/sgmm4b_12500 12500 data/train_si84 exp/sgmm4b exp/tri3b_ali_si84 exp/sgmm4b_12500
scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \ scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
exp/sgmm4b/graph_tgpr data/test_eval92 exp/sgmm4b_12500/decode_tgpr_eval92 exp/tri3b/decode_tgpr_eval92 exp/sgmm4b/graph_tgpr data/test_eval92 exp/sgmm4b_12500/decode_tgpr_eval92 exp/tri3b/decode_tgpr_eval92
# note: taking it up to 150k made it worse again [8.63->8.56->8.72]
# increasing phone dim but not #substates..
steps/mixup_sgmm_lda_etc.sh --num-jobs 10 --cmd "$train_cmd" --increase-phone-dim 50 \
10000 data/train_si84 exp/sgmm4b exp/tri3b_ali_si84 exp/sgmm4b_50
scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
exp/sgmm4b/graph_tgpr data/test_eval92 exp/sgmm4b_50/decode_tgpr_eval92 exp/tri3b/decode_tgpr_eval92
# Align 3b system with si284 data and num-jobs = 20; we'll train an LDA+MLLT+SAT system on si284 from this. # Align 3b system with si284 data and num-jobs = 20; we'll train an LDA+MLLT+SAT system on si284 from this.
# This is 4c. c.f. 4b which is "quick" training. # This is 4c. c.f. 4b which is "quick" training.
......
...@@ -30,8 +30,9 @@ nj=4 ...@@ -30,8 +30,9 @@ nj=4
boost=0.0 boost=0.0
cmd=scripts/run.pl cmd=scripts/run.pl
acwt=0.1 acwt=0.1
stage=0
for x in 1 2 3; do for x in `seq 8`; do
if [ $1 == "--num-jobs" ]; then if [ $1 == "--num-jobs" ]; then
shift; nj=$1; shift shift; nj=$1; shift
fi fi
...@@ -48,6 +49,9 @@ for x in 1 2 3; do ...@@ -48,6 +49,9 @@ for x in 1 2 3; do
if [ $1 == "--acwt" ]; then if [ $1 == "--acwt" ]; then
shift; acwt=$1; shift shift; acwt=$1; shift
fi fi
if [ $1 == "--stage" ]; then
shift; stage=$1; shift
fi
done done
if [ $# != 6 ]; then if [ $# != 6 ]; then
...@@ -102,35 +106,38 @@ while [ $x -lt $niters ]; do ...@@ -102,35 +106,38 @@ while [ $x -lt $niters ]; do
# on all iterations, even though it shouldn't be necessary on the zeroth # on all iterations, even though it shouldn't be necessary on the zeroth
# (but we want this script to work even if $srcdir doesn't contain the # (but we want this script to work even if $srcdir doesn't contain the
# model used to generate the lattice). # model used to generate the lattice).
for n in `get_splits.pl $nj`; do if [ $stage -le $x ]; then
$cmd $dir/log/acc_den.$x.$n.log \ for n in `get_splits.pl $nj`; do
gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \ $cmd $dir/log/acc_den.$x.$n.log \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \
gmm-acc-stats $cur_mdl "${featspart[$n]}" ark:- $dir/den_acc.$x.$n.acc \ lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|| touch $dir/.error & gmm-acc-stats $cur_mdl "${featspart[$n]}" ark:- $dir/den_acc.$x.$n.acc \
done || touch $dir/.error &
wait done
[ -f $dir/.error ] && echo Error accumulating den stats on iter $x && exit 1; wait
$cmd $dir/log/den_acc_sum.$x.log \ [ -f $dir/.error ] && echo Error accumulating den stats on iter $x && exit 1;
gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1; $cmd $dir/log/den_acc_sum.$x.log \
rm $dir/den_acc.$x.*.acc gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
rm $dir/den_acc.$x.*.acc
echo "Iteration $x: getting numerator stats."
for n in `get_splits.pl $nj`; do echo "Iteration $x: getting numerator stats."
$cmd $dir/log/acc_num.$x.$n.log \ for n in `get_splits.pl $nj`; do
gmm-acc-stats-ali $cur_mdl "${featspart[$n]}" "ark:gunzip -c $alidir/$n.ali.gz|" \ $cmd $dir/log/acc_num.$x.$n.log \
$dir/num_acc.$x.$n.acc || touch $dir/.error & gmm-acc-stats-ali $cur_mdl "${featspart[$n]}" "ark:gunzip -c $alidir/$n.ali.gz|" \
done $dir/num_acc.$x.$n.acc || touch $dir/.error &
wait; done
[ -f $dir/.error ] && echo Error accumulating num stats on iter $x && exit 1; wait;
$cmd $dir/log/num_acc_sum.$x.log \ [ -f $dir/.error ] && echo Error accumulating num stats on iter $x && exit 1;
gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1; $cmd $dir/log/num_acc_sum.$x.log \
rm $dir/num_acc.$x.*.acc gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
rm $dir/num_acc.$x.*.acc
$cmd $dir/log/update.$x.log \
gmm-est-mmi $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl \ $cmd $dir/log/update.$x.log \
|| exit 1; gmm-est-mmi $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl \
|| exit 1;
else
echo "not doing this iteration because --stage=$stage"
fi
cur_mdl=$dir/$[$x+1].mdl cur_mdl=$dir/$[$x+1].mdl
# Some diagnostics # Some diagnostics
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment