Commit 697a871a authored by Dan Povey's avatar Dan Povey
Browse files

Modifications to s3 scripts; fix to s1 scripts.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@445 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 2f0a0bb9
......@@ -86,32 +86,30 @@ exp/decode_tri2m_vtln_diag/wer:Average WER is 3.087848 (387 / 12533) # + diagon
exp/decode_tri2m_vtln_diag_utt/wer:Average WER is 4.340541 (544 / 12533) # [per-utterance]
exp/decode_tri2m_vtln_nofmllr/wer:Average WER is 5.784728 (725 / 12533) # feature-space VTLN, with no fMLLR
# sgmma is SGMM without speaker vectors.
exp/decode_sgmma/wer:Average WER is 3.319237 (416 / 12533)
exp/decode_sgmma_fmllr/wer:Average WER is 2.934308 (289 / 9849)
exp/decode_sgmma_fmllr/wer:Average WER is 2.928269 (367 / 12533)
exp/decode_sgmma_fmllr_utt/wer:Average WER is 3.303279 (414 / 12533)
exp/decode_sgmma_fmllrbasis_utt/wer:Average WER is 3.191574 (400 / 12533)
# sgmmb is SGMM with speaker vectors.
exp/decode_sgmmb/wer:Average WER is 2.760712 (346 / 12533)
exp/decode_sgmmb_fmllr/wer:Average WER is 2.585175 (324 / 12533)
exp/decode_sgmmb_utt/wer:Average WER is 2.808585 (352 / 12533)
exp/decode_sgmmb/wer:Average WER is 2.521344 (316 / 12533)
exp/decode_sgmmb_fmllr/wer:Average WER is 2.377723 (298 / 12533)
exp/decode_sgmmb_utt/wer:Average WER is 2.728796 (342 / 12533)
# sgmmc is like sgmmb but with gender dependency
exp/decode_sgmmc/wer:Average WER is 2.696880 (338 / 12533)
exp/decode_sgmmc_fmllr/wer:Average WER is 2.457512 (308 / 12533)
# "norm" is normalizing weights per gender..
exp/decode_sgmmc_norm/wer:Average WER is 2.696880 (338 / 12533)
exp/decode_sgmmc_fmllr_norm/wer:Average WER is 2.425596 (304 / 12533)
exp/decode_sgmmc/wer:Average WER is 2.720817 (341 / 12533)
exp/decode_sgmmc_fmllr/wer:Average WER is 2.489428 (312 / 12533)
# sgmmd is like sgmmb but with LDA+MLLT features.
exp/decode_sgmmd/wer:Average WER is 2.449533 (307 / 12533)
exp/decode_sgmmd_fmllr/wer:Average WER is 2.305912 (289 / 12533)
exp/decode_sgmmd/wer:Average WER is 2.656986 (333 / 12533)
exp/decode_sgmmd_fmllr/wer:Average WER is 2.409639 (302 / 12533)
# sgmme is like sgmmb but with LDA+ET features.
exp/decode_sgmme/wer:Average WER is 2.321870 (291 / 12533)
exp/decode_sgmme_fmllr/wer:Average WER is 2.154313 (270 / 12533)
exp/decode_sgmme/wer:Average WER is 2.337828 (293 / 12533)
exp/decode_sgmme_fmllr/wer:Average WER is 2.266018 (284 / 12533)
#### Note: stuff below this line may be out of date / not computed
......@@ -167,3 +165,4 @@ exp/decode_sgmmc_fmllr/wer:Average WER is 2.688901 (337 / 12533)
# 64-bit+ATLAS was 0.171s
# 32-bit+ATLAS was 0.205s
# 64-bit+MKL was 0.291s
......@@ -112,8 +112,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
......
......@@ -118,8 +118,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
......
......@@ -115,8 +115,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
......
......@@ -3,16 +3,27 @@
script=$1
dir=$2
if [ $# -ne 2 -o ! -x $script -o ! -d $dir ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir>"
if [ $# -lt 2 -o $# -gt 3 -o ! -x $script -o ! -d $dir ]; then
echo "Usage: scripts/decode.sh <decode-script> <decode-dir> [<old-decode-dir>]"
echo "[check your command line arguments]"
fi
scripts/mkgraph.sh data/lang_test $dir $dir/graph
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
$script $dir data/test_$test data/lang $dir/decode_$test &
done
if [ $# -eq 2 ]; then # normal case: 2 args.
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
$script $dir data/test_$test data/lang $dir/decode_$test &
done
else
olddir=$3
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
if [ ! -d $olddir/decode_$test ]; then
echo "decode.sh: no such directory $oldir/decode_$test";
exit 1;
fi
$script $dir data/test_$test data/lang $dir/decode_$test $olddir/decode_$test &
done
fi
wait
scripts/average_wer.sh $dir/decode_?????/wer > $dir/wer
cat $dir/wer
......
......@@ -96,7 +96,26 @@ local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d
# Next, SGMM system-- train SGMM system with speaker vectors, on top
# of LDA+MLLT features.
steps/train_sgmm_lda_mllt.sh data/train data/lang exp/tri2b_ali exp/sgmm3d
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3d/final.ubm exp/sgmm3d
scripts/mkgraph.sh data/lang_test exp/sgmm3d exp/sgmm3d/graph
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3d
# Align LDA+ET system prior to training corresponding SGMM system.
steps/align_lda_et.sh --graphs "ark,s,cs:gunzip -c exp/tri2c/graphs.fsts.gz|" \
data/train data/lang exp/tri2c exp/tri2c_ali
# Train SGMM system on top of LDA+ET.
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2c_ali exp/ubm3e/final.ubm exp/sgmm3e
local/decode.sh steps/decode_sgmm_lda_etc.sh exp/sgmm3e
# Now train SGMM system on top of LDA+MLLT+SAT
steps/train_ubm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f
steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri3d_ali exp/ubm4f/final.ubm exp/sgmm4f
##### Below here is trash. ######
......
......@@ -18,4 +18,4 @@
# To be run from one directory above this script.
grep WER $* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }'
awk '{e=e+$4; n=n+$6; i=i+$7; d=d+$9; s=s+$11;} END{ printf("%%WER %f [ %d / %d, %d ins, %d del, %d sub ]\n", (100.0*e)/n, e, n, i, d, s); }'
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using CMN +
# splice-9-frames + LDA + ET features. Its output, all in
# its own experimental directory, is cmvn.ark, trans.ark, ali, tree, final.mdl,
# final.alimdl, final.occs, final.mat and final.et (the last six are just copied
# from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
graphs=
if [ "$1" == --graphs ]; then
shift;
graphs=$1
shift
fi
if [ $# != 4 ]; then
echo "Usage: steps/align_lda_et.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/align_lda_et.sh data/train data/lang exp/tri2c exp/tri2c_ali"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
srcdir=$3
dir=$4
requirements="$srcdir/final.mdl $srcdir/final.alimdl $srcdir/final.mat $srcdir/final.et $srcdir/tree"
for f in $requirements; do
if [ ! -f $f ]; then
echo "align_lda_mllt.sh: no such file $f"
exit 1;
fi
done
mkdir -p $dir
cp $srcdir/{final.mdl,final.occs,final.alimdl,tree,final.mat,final.et} $dir || exit 1; # Create copies in $dir
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp \
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
# Align all training data using the supplied alignment model.
echo "Aligning all training data [with alignment model]"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.alimdl $lang/L.fst \
"$sifeats" ark:$dir/train.tra ark:$dir/pre.ali 2> $dir/align_pass1.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.alimdl \
"$graphs" "$sifeats" ark:$dir/pre.ali 2> $dir/align_pass1.log || exit 1;
fi
echo "Computing exponential transforms"
silphonelist=`cat $lang/silphones.csl`
[ -z $silphonelist ] && exit 1;
( ali-to-post ark:$dir/pre.ali ark:- | \
weight-silence-post 0.0 $silphonelist $dir/final.alimdl ark:- ark:- | \
gmm-post-to-gpost $dir/final.alimdl "$sifeats" ark:- ark:- | \
gmm-est-et --spk2utt=ark:$data/spk2utt $dir/final.mdl $dir/final.et \
"$sifeats" ark,s,cs:- ark:$dir/trans.ark ark,t:$dir/warp ) \
2>$dir/trans.log || exit 1;
feats="$sifeats transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
echo "Aligning all training data [with final model and features]"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align_pass2.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
"$graphs" "$feats" ark:$dir/ali 2> $dir/align_pass2.log || exit 1;
fi
rm $dir/pre.ali
echo "Done."
......@@ -54,7 +54,7 @@ for f in $requirements; do
done
mkdir -p $dir
cp $srcdir/{final.mdl,final.alimdl,tree,final.mat} $dir || exit 1; # Create copies in $dir
cp $srcdir/{final.mdl,final.occs,final.alimdl,tree,final.mat} $dir || exit 1; # Create copies in $dir
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a SGMM model [w/ speaker vectors]
# and cepstral mean subtraction plus splice-9-frames plus LDA+MLLT, or
# LDA+MLLT+SAT or LDA+ET features. For the last two, which
# are speaker adaptive, the script takes an extra argument
# corresponding to the previous decoding directory where we can
# find the transform trans.ark.
# This script itself does two passes of decoding.
if [ $# != 4 -a $# != 5 ]; then
echo "Usage: steps/decode_sgmm_lda_etc.sh <model-dir> <data-dir> <lang-dir> <decode-dir> [<old-decode-dir>]"
echo " e.g.: steps/decode_sgmm_lda_etc.sh exp/sgmm3d data/test_feb89 data/lang_test exp/sgmm3d/decode_feb89"
echo " or: steps/decode_sgmm_lda_etc.sh exp/sgmm3e data/test_feb89 data/lang_test exp/sgmm3e/decode_feb89 exp/tri2c/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
olddir=$5 # old decoding dir where there are transforms.
graphdir=$srcdir/graph
silphonelist=`cat $lang/silphones.csl`
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
# -f means file exists; -o means or.
requirements="$srcdir/final.mdl $srcdir/final.alimdl $srcdir/final.mat"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_lda_etc.sh: input file $f does not exist";
exit 1;
fi
done
if [ ! -f $graphdir/HCLG.fst -o $graphdir/HCLG.fst -ot $srcdir/final.mdl ]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# Compute CMVN stats.
compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,t:$dir/cmvn.ark \
2>$dir/cmvn.log
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
if [ ! -z $olddir ]; then # i.e. if $olddir not empty string...
if [ ! -f $olddir/trans.ark ]; then
echo decode_sgmm_lda_etc.sh: error: no such file $olddir/trans.ark
exit 1
fi
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$olddir/trans.ark ark:- ark:- |"
fi
sgmm-gselect $srcdir/final.mdl "$feats" "ark,t:|gzip -c > $dir/gselect.gz" \
2>$dir/gselect.log || exit 1;
gselect_opt="--gselect=ark:gunzip -c $dir/gselect.gz|"
# Using smaller beam for first decoding pass.
sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=$lang/words.txt \
$srcdir/final.alimdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
2> $dir/decode_pass1.log || exit 1;
( ali-to-post ark:$dir/pass1.ali ark:- | \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- | \
sgmm-post-to-gpost $srcdir/final.alimdl "$feats" ark:- ark:- | \
sgmm-est-spkvecs-gpost --spk2utt=ark:$data/spk2utt $srcdir/final.mdl "$feats" \
ark,s,cs:- ark:$dir/pre_vecs.ark ) \
2> $dir/vecs1.log || exit 1;
( ali-to-post ark:$dir/pass1.ali ark:- | \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- | \
sgmm-est-spkvecs --spk-vecs=ark:$dir/pre_vecs.ark --spk2utt=ark:$data/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- ark:$dir/vecs.ark ) \
2> $dir/vecs2.log || exit 1;
# Second pass decoding...
sgmm-decode-faster --beam=20.0 --acoustic-scale=0.1 "$gselect_opt" \
--spk-vecs=ark:$dir/vecs.ark --utt2spk=ark:$data/utt2spk \
--word-symbol-table=$lang/words.txt $srcdir/final.mdl $graphdir/HCLG.fst \
"$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
......@@ -57,9 +57,6 @@ mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
......@@ -101,8 +98,9 @@ convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
......
......@@ -57,10 +57,6 @@ mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/0.mat ark:- ark:- |"
splicedfeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating LDA statistics."
( ali-to-post ark:$alidir/ali ark:- | \
......@@ -110,8 +106,9 @@ convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Trains SGMM on top of LDA plus [something] features, where the [something]
# might be e.g. MLLT, or some kind of speaker-specific transform.
if [ $# != 5 ]; then
echo "Usage: steps/train_sgmm_lda_etc.sh <data-dir> <lang-dir> <ali-dir> <ubm> <exp-dir>"
echo " e.g.: steps/train_sgmm_lda_etc.sh data/train data/lang exp/tri2b_ali exp/ubm3c/final.ubm exp/sgmm3d"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
# This is SGMM with speaker vectors, on top of LDA+STC/MLLT features.
# To be run from ..
data=$1
lang=$2
alidir=$3
ubm=$4
dir=$5
mkdir -p $dir || exit 1;
cp $alidir/final.mat $dir/final.mat || exit 1;
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=25 # Total number of iterations
realign_iters="5 10 15";
spkvec_iters="5 8 12 17 22"
silphonelist=`cat $lang/silphones.csl`
spkspacedim=40
numleaves=2500
numsubstates=2500 # Initial #-substates.
totsubstates=7500 # Target #-substates.
maxiterinc=15 # Last iter to increase #substates on.
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.gz|"
# Initially don't have speaker vectors, but change this after
# we estimate them.
spkvecs_opt=
randprune=0.1
mkdir -p $dir
utt2spk_opt="--utt2spk=ark:$data/utt2spk"
spk2utt_opt="--spk2utt=ark:$data/spk2utt"
feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
if [ -f $alidir/trans.ark ]; then
echo "Running with speaker transforms $alidir/trans.ark"
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$alidir/trans.ark ark:- ark:- |"
fi
if [ ! -f $ubm ]; then
echo "No UBM in $ubm"
exit 1;
fi
# We rebuild the tree because we want a larger #states than for a normal
# GMM system (the optimum #states for SGMMs tends to be a bit higher).
if [ ! -f $dir/treeacc ]; then
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" ark:$alidir/ali \
$dir/treeacc 2> $dir/acc.tree.log || exit 1;
fi
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split > $dir/roots.txt 2>$dir/roots.log || exit 1;
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
# the sgmm-init program accepts a GMM, so we just create a temporary GMM "0.gmm"
gmm-init-model --write-occs=$dir/0.occs \
$dir/tree $dir/treeacc $lang/topo $dir/0.gmm 2> $dir/init_gmm.log || exit 1;
sgmm-init --spk-space-dim=$spkspacedim $lang/topo $dir/tree $ubm \
$dir/0.mdl 2> $dir/init_sgmm.log || exit 1;
sgmm-gselect $dir/0.mdl "$feats" ark,t:- 2>$dir/gselect.log | \
gzip -c > $dir/gselect.gz || exit 1;
convert-ali $alidir/final.mdl $dir/0.mdl $dir/tree ark:$alidir/ali \
ark:$dir/cur.ali 2>$dir/convert.log
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
"ark:scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text |" \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1
iter=0
while [ $iter -lt $numiters ]; do
echo "Pass $iter ... "
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--beam=8 --retry-beam=40 $dir/$iter.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
weight-silence-post 0.01 $silphonelist $dir/$iter.mdl ark:- ark:- | \
sgmm-est-spkvecs $spk2utt_opt $spkvecs_opt "$gselect_opt" \
--rand-prune=$randprune $dir/$iter.mdl \
"$feats" ark,s,cs:- ark:$dir/tmp.vecs ) 2>$dir/spkvecs.$iter.log || exit 1;
mv $dir/tmp.vecs $dir/cur.vecs
spkvecs_opt="--spk-vecs=ark:$dir/cur.vecs"
fi
if [ $iter -eq 0 ]; then
flags=vwcS
elif [ $[$iter%2] -eq 1 -a $iter -gt 4 ]; then # even iters after 4 (i.e. starting from 6)...
flags=vNwcS
else
flags=vMwcS
fi
sgmm-acc-stats $spkvecs_opt $utt2spk_opt --update-flags=$flags "$gselect_opt" --rand-prune=$randprune --binary=false $dir/$iter.mdl "$feats" "ark:ali-to-post ark:$dir/cur.ali ark:-|" $dir/$iter.acc 2> $dir/acc.$iter.log || exit 1;
sgmm-est --update-flags=$flags --split-substates=$numsubstates --write-occs=$dir/$[$iter+1].occs $dir/$iter.mdl $dir/$iter.acc $dir/$[$iter+1].mdl 2> $dir/update.$iter.log || exit 1;
rm $dir/$iter.mdl $dir/$iter.acc
rm $dir/$iter.occs
if [ $iter -lt $maxiterinc ]; then
numsubstates=$[$numsubstates+$incsubstates]
fi
iter=$[$iter+1];
done
# The point of this last phase of accumulation is to get Gaussian-level
# alignments with the speaker vectors but accumulate stats without
# any speaker vectors; we re-estimate M, w, c and S to get a model
# that's compatible with not having speaker vectors.
flags=MwcS
( ali-to-post ark:$dir/cur.ali ark:- | \
sgmm-post-to-gpost $spkvecs_opt $utt2spk_opt "$gselect_opt" \
$dir/$iter.mdl "$feats" ark,s,cs:- ark:- | \
sgmm-acc-stats-gpost --update-flags=$flags $dir/$iter.mdl "$feats" \
ark,s,cs:- $dir/$iter.aliacc ) 2> $dir/acc_ali.$iter.log || exit 1;
sgmm-est --update-flags=$flags --remove-speaker-space=true $dir/$iter.mdl \
$dir/$iter.aliacc $dir/$iter.alimdl 2>$dir/update_ali.$iter.log || exit 1;
( cd $dir; rm final.mdl final.occs 2>/dev/null;
ln -s $iter.mdl final.mdl;
ln -s $iter.alimdl final.alimdl;
ln -s $iter.occs final.occs )