Commit c339914e authored by Dan Povey's avatar Dan Povey
Browse files

Merging some changes into sandbox [I think]

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/discrim@502 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parents e8150f64 d4371dc4
...@@ -52,7 +52,7 @@ fi ...@@ -52,7 +52,7 @@ fi
# CMVN stats-- we make them part of a pipe. # CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |" feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 25 and acwt of 1/10. # For Resource Management, we use beam of 20 and acwt of 1/10.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so. # More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow. # If you decode with a beam of 20 on an LVCSR setup it will be very slow.
......
...@@ -91,7 +91,7 @@ while [ $x -lt $numiters ]; do ...@@ -91,7 +91,7 @@ while [ $x -lt $numiters ]; do
fi fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1; gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1; gmm-est --write-occs=$dir/$[$x+1].occs --mix-up=$numgauss $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs rm $dir/$x.mdl $dir/$x.acc $dir/$x.occs 2>/dev/null
if [ $x -le $maxiterinc ]; then if [ $x -le $maxiterinc ]; then
numgauss=$[$numgauss+$incgauss]; numgauss=$[$numgauss+$incgauss];
fi fi
......
...@@ -490,3 +490,11 @@ done ...@@ -490,3 +490,11 @@ done
# cat exp/decode_tri2a_tgpr_eval92/scoring/hyp.sys # cat exp/decode_tri2a_tgpr_eval92/scoring/hyp.sys
# notes on timing of alignment... trying it in tri2a for 500 utts. Took
# [with retry-beam=40]
# the results below seem to show that beam = 6 is the fastest...
# of course this assumes the retry-beam is 40.
# 20.9 sec @ beam = 7
# 13.8 sec @ beam = 6
# 14.4 sec @ beam = 5
# 14.4 sec @ beam = 4
\ No newline at end of file
...@@ -35,9 +35,10 @@ echo "Compiling LM fst" ...@@ -35,9 +35,10 @@ echo "Compiling LM fst"
grep -v '</s> <s>' | \ grep -v '</s> <s>' | \
grep -v '</s> </s>' | \ grep -v '</s> </s>' | \
arpa2fst - | fstprint | \ arpa2fst - | fstprint | \
scripts/remove_oovs.pl data/oovs_srilm.3g.kn.gz.txt | \ scripts/remove_oovs.pl data/oovs_tg.txt | \
scripts/eps2disambig.pl | fstcompile --isymbols=$WORDSYM --osymbols=$WORDSYM \ scripts/eps2disambig.pl | fstcompile --isymbols=$WORDSYM --osymbols=$WORDSYM \
--keep_isymbols=false --keep_osymbols=false > $TMP/G.fst --keep_isymbols=false --keep_osymbols=false | \
fstproject --project_output=true > $TMP/G.fst
echo "Extracting $NBEST best" echo "Extracting $NBEST best"
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# sgmm3e is as sgmm3b but on splice+LDA+ET features, not delta+accel
# sgmm3b is as sgmm3a (SGMM with half SI-284), but adding speaker vectors.
if [ -f path.sh ]; then . path.sh; fi
dir=exp/sgmm3e
srcdir=exp/tri2k
mat=$srcdir/lda.mat
ubm=exp/ubm3d/final.ubm
srcmodel=$srcdir/final.mdl
srcalimodel=$srcdir/final.alimdl
ldamat=$srcdir/lda.mat
defaultmat=$srcdir/default.mat
et=$srcdir/final.et
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=35 # Total number of iterations.
realign_iters="5 15 25"; # realign a bit earlier than we did in tri2a,
# since SGMM system quite different
# from normal triphone system.
spkvec_iters="5 8 12 17 22 32"
maxiterinc=20 # By this iter, we have all the substates.
numleaves=6000 # was 2k for GMM system: incresaing it for SGMM system.
numsubstates=6000 # initial #-substates
totsubstates=35000 # a little less than #Gauss for baseline GMM system (40k)
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
phn_dim=50
phn_dim_iter=3 # iter to increase phn dim.
silphonelist=`cat data/silphones.csl`
randprune=0.1
mkdir -p $dir
cp data/train.scp $dir
cp data/train.tra $dir
scripts/filter_scp.pl $dir/train.scp data/train.utt2spk > $dir/train.utt2spk
scripts/utt2spk_to_spk2utt.pl $dir/train.utt2spk > $dir/train.spk2utt
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.scp
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.tra
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
done
# also see featspart below, used for sub-parts of the features;
# try to keep them in sync.
feats="ark,s,cs:splice-feats --print-args=false scp:$dir/train.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/?.trans|\" ark:- ark:- |"
for n in 1 2 3; do
featspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train$n.utt2spk ark:$dir/$n.trans ark: ark:- |"
defaultfeatspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $defaultmat ark:- ark:- |"
ldafeatspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- |"
done
if [ ! -f $ubm ]; then
echo "No UBM in $ubm";
exit 1
fi
cp $srcdir/topo $dir
echo "Creating training graphs for model from $srcdir"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $srcdir/tree $srcmodel data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/srcgraphs${n}.fsts.gz" \
2>$dir/compile_src_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo source-dir compile-graphs error && exit 1
# Align all training data using old alignment model.
echo "Aligning all training data with old alignment model"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcalimodel \
"ark:gunzip -c $dir/srcgraphs${n}.fsts.gz|" "${defaultfeatspart[$n]}" \
"ark:|gzip -c >$dir/pre.${n}.ali.gz" \
2> $dir/pre_align.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo align error RE old system && exit 1
echo "Computing ET transforms"
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/pre.${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $srcalimodel ark:- ark:- | \
gmm-post-to-gpost $srcalimodel "${defaultfeatspart[$n]}" ark,o:- ark:- | \
gmm-est-et --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $srcmodel $et \
"${ldafeatspart[$n]}" ark,s,cs:- ark:$dir/$n.trans ark,t:$dir/$n.warp ) \
2> $dir/et_trans.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
rm $dir/pre.${n}.ali.gz # remove first-pass alignments
echo "Aligning all training data with old adapted model"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
"ark:gunzip -c $dir/srcgraphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/0.${n}.ali.gz" \
2> $dir/old_align.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo align error RE old system && exit 1
rm $dir/srcgraphs*.fsts.gz # These graphs no longer needed.
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" \
"ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
# The next few commands are involved with making the questions
# for tree clustering. The extra complexity vs. the RM recipe has
# to do with the desire to ask questions about the "real" phones
# ignoring things like stress and position-in-word, and ask questions
# separately about stress and position-in-word.
# Don't include silences as things to be clustered -> --nosil option.
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
scripts/make_roots.sh > $dir/roots_syms.txt
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
# the sgmm-init program accepts a GMM, so we just create a temporary GMM "0.gmm"
gmm-init-model --write-occs=$dir/0.occs \
$dir/tree $dir/treeacc $dir/topo $dir/0.gmm 2> $dir/init_gmm.log || exit 1;
sgmm-init --spk-space-dim=40 $dir/topo $dir/tree $ubm $dir/0.mdl 2> $dir/init_sgmm.log || exit 1;
rm $dir/0.gmm
rm $dir/treeacc
for n in 1 2 3; do
sgmm-gselect $dir/0.mdl "${featspart[$n]}" ark,t:- 2>$dir/gselect$n.log | \
gzip -c > $dir/gselect${n}.gz || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error in gselect phase" && exit 1;
# Convert alignments generated from previous model, to use as
# initial alignments.
for n in 1 2 3; do
convert-ali $srcmodel $dir/0.mdl $dir/tree \
"ark:gunzip -c $dir/0.$n.ali.gz|" \
"ark:|gzip -c > $dir/cur$n.ali.gz" \
2>$dir/convert.$n.log || exit 1; # don't parallelize: mostly I/O.
done
rm $dir/0.?.ali.gz
# Make training graphs
echo "Compiling training graphs"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $dir/tree $dir/0.mdl data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo compile-graphs error && exit 1
x=0
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
rm -f $dir/.error
for n in 1 2 3; do
sgmm-align-compiled ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
$scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|| touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
fi
if echo $spkvec_iters | grep -w $x >/dev/null; then
echo "Computing speaker vectors"
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- | \
sgmm-est-spkvecs --spk2utt=ark:$dir/train$n.spk2utt ${spkvecs_opt[$n]} \
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
--rand-prune=$randprune $dir/$x.mdl \
"${featspart[$n]}" ark,s,cs:- ark:$dir/tmp$n.vecs && \
mv $dir/tmp$n.vecs $dir/cur$n.vecs ) 2>$dir/spkvecs.$x.$n.log \
|| touch $dir/.error &
spkvecs_opt[$n]="--spk-vecs=ark:$dir/cur$n.vecs"
done
wait;
[ -f $dir/.error ] && echo error computing speaker vectors && exit 1
fi
if [ $x -eq 0 ]; then
flags=vwcS
elif [ $[$x%2] -eq 1 -a $x -gt 4 ]; then # even iters after 4 (i.e. starting from 6)...
flags=vNwcS
else
flags=vMwcS
fi
for n in 1 2 3; do
sgmm-acc-stats-ali ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
--update-flags=$flags "--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
--rand-prune=$randprune --binary=true $dir/$x.mdl "${featspart[$n]}" \
"ark:gunzip -c $dir/cur$n.ali.gz|" $dir/$x.$n.acc 2> $dir/acc.$x.$n.log \
|| touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo error accumulating stats on iter $x && exit 1
if [ $x == $phn_dim_iter ]; then
phn_dim_opt=--increase-phn-dim=$phn_dim
else
phn_dim_opt=
fi
sgmm-est $phn_dim_opt --update-flags=$flags --split-substates=$numsubstates \
--write-occs=$dir/$[$x+1].occs $dir/$x.mdl \
"sgmm-sum-accs - $dir/$x.?.acc|" $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
rm $dir/$x.mdl $dir/$x.?.acc $dir/$x.occs 2>/dev/null
if [ $x -lt $maxiterinc ]; then
numsubstates=$[$numsubstates+$incsubstates]
fi
x=$[$x+1];
done
( cd $dir; rm final.mdl final.occs 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
# Create "alignment model"
flags=MwcS
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur$n.ali.gz|" ark:- | \
sgmm-post-to-gpost ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
$dir/$x.mdl "${featspart[$n]}" ark,s,cs:- ark:- | \
sgmm-acc-stats-gpost --update-flags=$flags $dir/$x.mdl "${featspart[$n]}" \
ark,s,cs:- $dir/$x.$n.aliacc ) 2> $dir/acc_ali.$x.$n.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo error accumulating stats for alignment model && exit 1
sgmm-est --update-flags=$flags --remove-speaker-space=true $dir/$x.mdl \
"sgmm-sum-accs - $dir/$x.?.aliacc|" $dir/$x.alimdl 2>$dir/update_ali.$x.log || exit 1;
rm $dir/$x.?.aliacc
( cd $dir; rm final.alimdl 2>/dev/null; ln -s $x.alimdl final.alimdl; )
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# sgmm3f is descended from sgmm3c (gender-dependent UBM) and sgmm3e
# (exponential-transform features).
if [ -f path.sh ]; then . path.sh; fi
dir=exp/sgmm3f
srcdir=exp/tri2k
mat=$srcdir/lda.mat
ubm=exp/ubm3f/final.ubm # about 800 UBM comps (400 each for male and female).
preselectmap=exp/ubm3f/preselect.map
srcmodel=$srcdir/final.mdl
srcalimodel=$srcdir/final.alimdl
ldamat=$srcdir/lda.mat
defaultmat=$srcdir/default.mat
et=$srcdir/final.et
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
numiters=35 # Total number of iterations.
realign_iters="5 15 25"; # realign a bit earlier than we did in tri2a,
# since SGMM system quite different
# from normal triphone system.
spkvec_iters="5 8 12 17 22 32"
maxiterinc=20 # By this iter, we have all the substates.
numleaves=6000 # was 4.2k for GMM system: increasing it for SGMM system.
numsubstates=6000 # initial #-substates
totsubstates=35000 # a little less than #Gauss for baseline GMM system (40k)
incsubstates=$[($totsubstates-$numsubstates)/$maxiterinc] # per-iter increment for #substates
phn_dim=50
phn_dim_iter=3 # iter to increase phn dim.
silphonelist=`cat data/silphones.csl`
randprune=0.1
mkdir -p $dir
cp data/train.scp $dir
cp data/train.tra $dir
scripts/filter_scp.pl $dir/train.scp data/train.utt2spk > $dir/train.utt2spk
scripts/utt2spk_to_spk2utt.pl $dir/train.utt2spk > $dir/train.spk2utt
scripts/compose_maps.pl $dir/train.utt2spk data/spk2gender.map | \
scripts/compose_maps.pl - $preselectmap | \
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk - $dir/preselect.{1,2,3}
gzip -f $dir/preselect.{1,2,3}
# done creating preselect file.
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.scp
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.tra
scripts/split_scp.pl --utt2spk=$dir/train.utt2spk $dir/train{,1,2,3}.utt2spk
for n in 1 2 3 ""; do # The "" handles the un-split one. Creating spk2utt files..
scripts/utt2spk_to_spk2utt.pl $dir/train$n.utt2spk > $dir/train$n.spk2utt
done
# also see featspart below, used for sub-parts of the features;
# try to keep them in sync.
feats="ark,s,cs:splice-feats --print-args=false scp:$dir/train.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train.utt2spk \"ark:cat $dir/?.trans|\" ark:- ark:- |"
for n in 1 2 3; do
featspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- | transform-feats --utt2spk=ark:$dir/train$n.utt2spk ark:$dir/$n.trans ark: ark:- |"
defaultfeatspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $defaultmat ark:- ark:- |"
ldafeatspart[$n]="ark,s,cs:splice-feats --print-args=false scp:$dir/train${n}.scp ark:- | transform-feats $ldamat ark:- ark:- |"
done
if [ ! -f $ubm ]; then
echo "No UBM in $ubm";
exit 1
fi
echo "Creating training graphs for model from $srcdir"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $srcdir/tree $srcmodel data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/srcgraphs${n}.fsts.gz" \
2>$dir/compile_src_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo source-dir compile-graphs error && exit 1
# Align all training data using old alignment model.
echo "Aligning all training data with old alignment model"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcalimodel \
"ark:gunzip -c $dir/srcgraphs${n}.fsts.gz|" "${defaultfeatspart[$n]}" \
"ark:|gzip -c >$dir/pre.${n}.ali.gz" \
2> $dir/pre_align.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo align error RE old system && exit 1
echo "Computing ET transforms"
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/pre.${n}.ali.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $srcalimodel ark:- ark:- | \
gmm-post-to-gpost $srcalimodel "${defaultfeatspart[$n]}" ark,o:- ark:- | \
gmm-est-et --spk2utt=ark:$dir/train$n.spk2utt --verbose=1 $srcmodel $et \
"${ldafeatspart[$n]}" ark,s,cs:- ark:$dir/$n.trans ark,t:$dir/$n.warp ) \
2> $dir/et_trans.$n.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
rm $dir/pre.${n}.ali.gz # remove first-pass alignments
cp $srcdir/topo $dir
# Align all training data using old model (and old graphs, since we
# use the same data-subset as last time).
# Note: a small number of utterances don't have graphs at this stage because of differences
# in how the data splitting is done when we switch to using speaker information.
echo "Aligning all training data with old adapted model"
rm -f $dir/.error
for n in 1 2 3; do
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $srcmodel \
"ark:gunzip -c $dir/srcgraphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/0.${n}.ali.gz" \
2> $dir/old_align.${n}.log || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo align error RE old system && exit 1
rm $dir/srcgraphs*.fsts.gz # These graphs no longer needed.
acc-tree-stats --ci-phones=$silphonelist $srcmodel "$feats" \
"ark:gunzip -c $dir/0.?.ali.gz|" $dir/treeacc 2> $dir/acc.tree.log || exit 1;
# The next few commands are involved with making the questions
# for tree clustering. The extra complexity vs. the RM recipe has
# to do with the desire to ask questions about the "real" phones
# ignoring things like stress and position-in-word, and ask questions
# separately about stress and position-in-word.
# Don't include silences as things to be clustered -> --nosil option.
scripts/make_shared_phones.sh --nosil | scripts/sym2int.pl data/phones.txt > $dir/phone_sets.list
cluster-phones $dir/treeacc $dir/phone_sets.list $dir/questions.txt 2> $dir/cluster_phones.log || exit 1;
scripts/int2sym.pl data/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
scripts/make_extra_questions.sh | cat $dir/questions_syms.txt - > $dir/questions_syms_all.txt
scripts/sym2int.pl data/phones.txt < $dir/questions_syms_all.txt > $dir/questions_all.txt
compile-questions $dir/topo $dir/questions_all.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
scripts/make_roots.sh > $dir/roots_syms.txt
scripts/sym2int.pl --ignore-oov data/phones.txt < $dir/roots_syms.txt > $dir/roots.txt
build-tree --verbose=1 --max-leaves=$numleaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $dir/topo $dir/tree 2> $dir/train_tree.log || exit 1;
# the sgmm-init program accepts a GMM, so we just create a temporary GMM "0.gmm"
gmm-init-model --write-occs=$dir/0.occs \
$dir/tree $dir/treeacc $dir/topo $dir/0.gmm 2> $dir/init_gmm.log || exit 1;
sgmm-init --spk-space-dim=40 $dir/topo $dir/tree $ubm $dir/0.mdl 2> $dir/init_sgmm.log || exit 1;
rm $dir/0.gmm
rm $dir/treeacc
for n in 1 2 3; do
sgmm-gselect "--preselect=ark:gunzip -c $dir/preselect.${n}.gz|" \
$dir/0.mdl "${featspart[$n]}" ark,t:- 2>$dir/gselect$n.log | \
gzip -c > $dir/gselect${n}.gz || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "Error in gselect phase" && exit 1;
# Convert alignments generated from previous model, to use as
# initial alignments.
for n in 1 2 3; do
convert-ali $srcmodel $dir/0.mdl $dir/tree \
"ark:gunzip -c $dir/0.$n.ali.gz|" \
"ark:|gzip -c > $dir/cur$n.ali.gz" \
2>$dir/convert.$n.log || exit 1; # don't parallelize: mostly I/O.
done
rm $dir/0.?.ali.gz
# Make training graphs
echo "Compiling training graphs"
rm -f $dir/.error
for n in 1 2 3; do
compile-train-graphs $dir/tree $dir/0.mdl data/L.fst ark:$dir/train${n}.tra \
"ark:|gzip -c > $dir/graphs${n}.fsts.gz" \
2>$dir/compile_graphs.${n}.log || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo compile-graphs error && exit 1
x=0
while [ $x -lt $numiters ]; do
echo "Pass $x"
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
rm -f $dir/.error
for n in 1 2 3; do
sgmm-align-compiled ${spkvecs_opt[$n]} --utt2spk=ark:$dir/train$n.utt2spk \
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
$scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs${n}.fsts.gz|" "${featspart[$n]}" \
"ark:|gzip -c >$dir/cur${n}.ali.gz" 2> $dir/align.$x.$n.log \
|| touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo error aligning data && exit 1
fi
if echo $spkvec_iters | grep -w $x >/dev/null; then
echo "Computing speaker vectors"
for n in 1 2 3; do
( ali-to-post "ark:gunzip -c $dir/cur${n}.ali.gz|" ark:- | \
weight-silence-post 0.01 $silphonelist $dir/$x.mdl ark:- ark:- | \
sgmm-est-spkvecs --spk2utt=ark:$dir/train$n.spk2utt ${spkvecs_opt[$n]} \
"--gselect=ark,s,cs:gunzip -c $dir/gselect$n.gz|" \
--rand-prune=$randprune $dir/$x.mdl \
"${featspart[$n]}" ark,s,cs:- ark:$dir/tmp$n.vecs && mv $dir/tmp$n.vecs $dir/cur$n.vecs ) 2>$dir/spkvecs.$x.$n.log \
|| touch $dir/.error &
spkvecs_opt[$n]="--spk-vecs=ark:$dir/cur$n.vecs"
done
wait;
[ -f $dir/.error ] && echo error computing speaker vectors && exit 1
fi
if [ $x -eq 0 ]; then
flags=vwcS
elif [ $[$x%2] -eq 1 -a $x -gt 4 ]; then # even iters after 4 (i.e. starting from 6)...