Commit ef9a515d authored by Dan Povey's avatar Dan Povey
Browse files

Minor changes to scripts, mainly RE mixing-up commands.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@636 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 6b07831c
......@@ -120,6 +120,15 @@ scripts/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" \
steps/decode_lda_mllt_sat.sh exp/tri5a/graph data/eval2000 exp/tri5a/decode_eval2000
( # Try mixing up from the 5a system to see if more Gaussians helps.
steps/mixup_lda_etc.sh --num-jobs 30 --cmd "$train_cmd" \
175000 data/train_nodup exp/tri5a exp/tri4a_ali_all_nodup exp/tri5a_175k
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri5a/graph \
data/eval2000 exp/tri5a_175k/decode_eval2000
)
# Align the 5a system; we'll train an SGMM system on top of
# LDA+MLLT+SAT, and use 5a system for 1st pass.
steps/align_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \
......@@ -167,6 +176,9 @@ scripts/decode.sh -l data/lang_test --num-jobs 30 --cmd "$decode_cmd" steps/deco
data/eval2000 exp/tri5a_mmi_b0.1/decode_eval2000 exp/tri5a/decode_eval2000
# getting results (see RESULTS file)
for x in exp/*/decode_*; do [ -d $x ] && grep Mean $x/score_*/*.sys | scripts/best_wer.sh; done
......@@ -260,7 +260,12 @@ scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
12500 data/train_si84 exp/sgmm4b exp/tri3b_ali_si84 exp/sgmm4b_12500
scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
exp/sgmm4b/graph_tgpr data/test_eval92 exp/sgmm4b_12500/decode_tgpr_eval92 exp/tri3b/decode_tgpr_eval92
# note: taking it up to 150k made it worse again [8.63->8.56->8.72]
# increasing phone dim but not #substates..
steps/mixup_sgmm_lda_etc.sh --num-jobs 10 --cmd "$train_cmd" --increase-phone-dim 50 \
10000 data/train_si84 exp/sgmm4b exp/tri3b_ali_si84 exp/sgmm4b_50
scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh \
exp/sgmm4b/graph_tgpr data/test_eval92 exp/sgmm4b_50/decode_tgpr_eval92 exp/tri3b/decode_tgpr_eval92
# Align 3b system with si284 data and num-jobs = 20; we'll train an LDA+MLLT+SAT system on si284 from this.
# This is 4c. c.f. 4b which is "quick" training.
......
......@@ -30,8 +30,9 @@ nj=4
boost=0.0
cmd=scripts/run.pl
acwt=0.1
stage=0
for x in 1 2 3; do
for x in `seq 8`; do
if [ $1 == "--num-jobs" ]; then
shift; nj=$1; shift
fi
......@@ -48,6 +49,9 @@ for x in 1 2 3; do
if [ $1 == "--acwt" ]; then
shift; acwt=$1; shift
fi
if [ $1 == "--stage" ]; then
shift; stage=$1; shift
fi
done
if [ $# != 6 ]; then
......@@ -102,6 +106,7 @@ while [ $x -lt $niters ]; do
# on all iterations, even though it shouldn't be necessary on the zeroth
# (but we want this script to work even if $srcdir doesn't contain the
# model used to generate the lattice).
if [ $stage -le $x ]; then
for n in `get_splits.pl $nj`; do
$cmd $dir/log/acc_den.$x.$n.log \
gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \
......@@ -130,7 +135,9 @@ while [ $x -lt $niters ]; do
$cmd $dir/log/update.$x.log \
gmm-est-mmi $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl \
|| exit 1;
else
echo "not doing this iteration because --stage=$stage"
fi
cur_mdl=$dir/$[$x+1].mdl
# Some diagnostics
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment