Commit 465305aa authored by Dan Povey's avatar Dan Povey
Browse files

Fixes and improvements to scripts for lattice-rescoring and fMLLR with SGMMs.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@459 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent a281368f
......@@ -3,6 +3,10 @@
# This script basically calls the supplied decoding script
# once for each test set (in parallel on the same machine),
# and then averages the resulting WERs.
# The interpretation of the decode-dir-1, etc., as inputs,
# outputs and so on, depends on the decoding script you call.
# It assumes the model directory is one level of from decode-dir-1.
mono_opt=
......@@ -12,44 +16,44 @@ if [ "$1" == "--mono" ]; then
fi
script=$1
decode_dir=$2 # e.g. exp/sgmm3b/decode
dir=`dirname $decode_dir` # e.g. exp/sgmm3b
decode_dir_1=$2 # e.g. exp/sgmm3b/decode
decode_dir_2=$3
decode_dir_3=$4
dir=`dirname $decode_dir_1` # e.g. exp/sgmm3b
if [ $# -lt 2 -o $# -gt 3 ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
if [ $# -lt 2 -o $# -gt 4 ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir-1> [<decode-dir-2> [<decode-dir-3>] ]"
exit 1;
fi
if [ ! -x $script -o ! -d $dir ]; then
echo "scripts/decode.sh: Either no such script $script or not exebutable, or no such dir $dir"
echo "scripts/decode.sh: Either no such script $script or not executable, or no such dir $dir"
exit 1;
fi
mkdir -p $decode_dir
scripts/mkgraph.sh $mono_opt data/lang_test $dir $dir/graph
if [ $# -eq 2 ]; then # normal case: 2 args.
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
$script $dir data/test_$test data/lang $decode_dir/$test &
$script $dir data/test_$test data/lang $decode_dir_1/$test &
done
elif [ $# -eq 3 ]; then
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
$script $dir data/test_$test data/lang $decode_dir_1/$test $decode_dir_2/$test &
done
else
olddir=$3
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
if [ ! -d $olddir/$test ]; then
echo "decode.sh: no such directory $olddir/$test";
exit 1;
fi
$script $dir data/test_$test data/lang $decode_dir/$test $olddir/$test &
$script $dir data/test_$test data/lang $decode_dir_1/$test $decode_dir_2/$test $decode_dir_3/$test &
done
fi
wait
# Average the WERs... there may be various wer files named e.g. wer, wer_10, etc.,
# so do this for each one.
for w in $decode_dir/mar87/wer*; do
for w in $decode_dir_1/mar87/wer*; do
wername=`basename $w`
scripts/average_wer.sh $decode_dir/?????/$wername > $decode_dir/$wername
scripts/average_wer.sh $decode_dir_1/?????/$wername > $decode_dir_1/$wername
done
grep WER $decode_dir/wer* || echo "Error decoding $decode_dir: no WER results found."
grep WER $decode_dir_1/wer* || echo "Error decoding $decode_dir: no WER results found."
......@@ -113,12 +113,17 @@ steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f/final.u
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm4f/decode exp/tri3d/decode
# Decode with fMLLR
sgmm-comp-prexform exp/sgmm4f/final.{mdl,occs,fmllr_mdl}
local/decode.sh steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm4f/decode_fmllr exp/sgmm4f/decode exp/tri3d/decode
local/decode.sh steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm4f/decode_nofmllr exp/sgmm4f/decode exp/tri3d/decode
# Some system combination experiments (just compose lattices).
local/decode_combine.sh steps/decode_combine.sh exp/tri1/decode exp/tri2a/decode exp/combine_1_2a/decode
local/decode_combine.sh steps/decode_combine.sh exp/sgmm4f/decode/ exp/tri3d/decode exp/combine_sgmm4f_tri3d/decode
for x in exp/*/decode; do grep WER $x/wer_* | scripts/best_wer.sh; done
for x in exp/*/decode*; do grep WER $x/wer_* | scripts/best_wer.sh; done
exp/combine_1_2a/decode/wer_7:%WER 3.399027 [ 426 / 12533, 55 ins, 94 del, 277 sub ]
exp/combine_sgmm4f_tri3d/decode/wer_5:%WER 1.731429 [ 217 / 12533, 30 ins, 43 del, 144 sub ]
......
......@@ -57,7 +57,7 @@ feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ar
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
......
......@@ -78,7 +78,7 @@ feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmv
# Second pass decoding... generate lattices and rescore with
# various scales.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali 2> $dir/decode_pass2.log || exit 1;
......
......@@ -57,7 +57,7 @@ feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ar
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/lat.gz" \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.gz" \
ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
......
......@@ -73,7 +73,7 @@ if [ ! -z $olddir ]; then # i.e. if $olddir not empty string...
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$olddir/trans.ark ark:- ark:- |"
fi
sgmm-gselect $srcdir/final.mdl "$feats" "ark,t:|gzip -c > $dir/gselect.gz" \
sgmm-gselect $srcdir/final.mdl "$feats" "ark:|gzip -c > $dir/gselect.gz" \
2>$dir/gselect.log || exit 1;
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
......@@ -100,7 +100,7 @@ sgmm-decode-faster "$gselect_opt" --beam=20.0 --acoustic-scale=0.1 --word-symbol
sgmm-latgen-simple --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
--spk-vecs=ark:$dir/vecs.ark --utt2spk=ark:$data/utt2spk \
--word-symbol-table=$lang/words.txt $srcdir/final.mdl $graphdir/HCLG.fst \
"$feats" "ark,t:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
"$feats" "ark:|gzip -c >$dir/lat.gz" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a SGMM model [w/ speaker vectors]
# and cepstral mean subtraction plus splice-9-frames plus LDA+MLLT, or
# LDA+MLLT+SAT or LDA+ET features. For the last two, which
# are speaker adaptive, the script takes an extra argument
# corresponding to the previous decoding directory where we can
# find the transform trans.ark.
# This script itself does two passes of decoding.
if [ $# != 5 -a $# != 6 ]; then
echo "Usage: steps/decode_sgmm_lda_etc_fmllr.sh <model-dir> <data-dir> <lang-dir> <decode-dir> <old-sgmm-decode-dir> [<old-decode-dir-for-transforms>]"
echo " e.g.: steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm3d data/test_feb89 data/lang_test exp/sgmm3d/decode/feb89 exp/sgmm3d/decode_fmllr/feb89"
echo " or: steps/decode_sgmm_lda_etc_fmllr.sh exp/sgmm3e data/test_feb89 data/lang_test exp/sgmm3e/decode/feb89 exp/sgmm3e/decode_fmllr/feb89 exp/tri2c/decode/feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
firstpassdir=$5
olddir=$6 # old decoding dir where there are transforms [possibly]
graphdir=$srcdir/graph
silphonelist=`cat $lang/silphones.csl`
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
# -f means file exists; -o means or.
requirements="$srcdir/final.mdl $srcdir/final.fmllr_mdl $srcdir/final.mat $firstpassdir/cmvn.ark $firstpassdir/lat.gz $firstpassdir/gselect.gz $firstpassdir/vecs.ark"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_lda_etc.sh: input file $f does not exist";
exit 1;
fi
done
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$firstpassdir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
if [ ! -z $olddir ]; then # i.e. if $olddir not empty string...
if [ ! -f $olddir/trans.ark ]; then
echo decode_sgmm_lda_etc.sh: error: no such file $olddir/trans.ark
exit 1
fi
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$olddir/trans.ark ark:- ark:- |"
fi
gselect_opt="--gselect=ark:gunzip -c $firstpassdir/gselect.gz|"
# Here we estimate the fMLLR transforms-- just one iteration should be sufficient,
# as it's after many adaptation passes.
( lattice-to-post --acoustic-scale=0.1 "ark:gunzip -c $firstpassdir/lat.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- | \
sgmm-est-fmllr --fmllr-iters=10 --fmllr-min-count=1000 "$gselect_opt" \
--spk-vecs=ark:$firstpassdir/vecs.ark --spk2utt=ark:$data/spk2utt $srcdir/final.fmllr_mdl \
"$feats" ark,s,cs:- ark:$dir/trans.ark ) 2>$dir/est_fmllr.log || exit 1;
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
sgmm-rescore-lattice "$gselect_opt" --spk-vecs=ark:$firstpassdir/vecs.ark \
--utt2spk=ark:$data/utt2spk $srcdir/final.mdl \
"ark:gunzip -c $firstpassdir/lat.gz|" "$feats" "ark:|gzip -c >$dir/lat.gz" \
2>$dir/acoustic_rescore.log || exit 1;
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 4 5 6 7 8 9 10; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=$lang/words.txt \
"ark:gunzip -c $dir/lat.gz|" ark,t:$dir/${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done
// gmmbin/gmm-resocre-lattice.cc
// gmmbin/gmm-rescore-lattice.cc
// Copyright 2009-2011 Saarland University
// Author: Arnab Ghoshal
......@@ -70,6 +70,24 @@ void LatticeAcousticRescore(const AmDiagGmm& am,
}
}
}
// Now make sure that epsilon-input arcs and final-probs don't have
// any acoustic part in the weights.
for (int32 s = 0; s < lat->NumStates(); s++) {
for (fst::MutableArcIterator<Lattice> aiter(lat, s); !aiter.Done();
aiter.Next()) {
LatticeArc arc = aiter.Value();
int32 trans_id = arc.ilabel;
if (trans_id == 0) {
arc.weight.SetValue2(0); // make sure acoustic part of weight is zero.
aiter.SetValue(arc);
}
}
LatticeWeight w = lat->Final(s);
if (w != LatticeWeight::Zero()) {
w.SetValue2(0); // make sure acoustic part of weight is zero.
lat->SetFinal(s, w);
}
}
}
} // namespace kaldi
......
......@@ -85,6 +85,26 @@ void LatticeAcousticRescore(const AmSgmm& am,
}
}
}
// Now make sure that epsilon-input arcs and final-probs don't have
// any acoustic part in the weights. We didn't do this as part of the
// previous loop as it skipped over final-states, and these also may
// have epsilon arcs out.
for (int32 s = 0; s < lat->NumStates(); s++) {
for (fst::MutableArcIterator<Lattice> aiter(lat, s); !aiter.Done();
aiter.Next()) {
LatticeArc arc = aiter.Value();
int32 trans_id = arc.ilabel;
if (trans_id == 0) {
arc.weight.SetValue2(0); // make sure acoustic part of weight is zero.
aiter.SetValue(arc);
}
}
LatticeWeight w = lat->Final(s);
if (w != LatticeWeight::Zero()) {
w.SetValue2(0); // make sure acoustic part of weight is zero.
lat->SetFinal(s, w);
}
}
}
} // namespace kaldi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment