Commit a6f1b688 authored by Korbinian Riedhammer's avatar Korbinian Riedhammer
Browse files

RM/s3 example scripts for semi-continuous models

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/discrim@447 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent d82f2b38
root=`pwd`/../../..
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:$PATH
export PATH=${root}/src/bin:${root}/tools/openfst/bin:${root}/src/fstbin/:${root}/src/gmmbin/:${root}/src/featbin/:${root}/src/fgmmbin:${root}/src/sgmmbin:${root}/src/lm:${root}/src/latbin:${root}/src/tiedbin:$PATH
export LC_ALL=C
export LC_LOCALE_ALL=C
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
N=5
P=1
clean=false
for x in 1 2 3; do
if [ $1 == "--mono" ]; then
N=1;
P=0;
shift;
fi
if [ $1 == "--clean" ]; then
clean=true
shift;
fi
done
if [ $# != 3 ]; then
echo "Usage: scripts/mkgraph.sh <test-lang-dir> <model-dir> <graphdir>"
echo "e.g.: scripts/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
lang=$1
tree=$2/tree
model=$2/final.mdl
dir=$3
if $clean; then rm -r $lang/tmp; fi
mkdir -p $dir
tscale=1.0
loopscale=0.1
# If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
# (note: the [[ ]] brackets make the || type operators work (inside [ ], we
# would have to use -o instead), -f means file exists, and -ot means older than).
mkdir -p $lang/tmp
if [[ ! -f $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
$lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded > $lang/tmp/LG.fst || exit 1;
fstisstochastic $lang/tmp/LG.fst || echo "warning: LG not stochastic."
fi
if [ ! -f $lang/phones_disambig.txt ]; then
echo "No such file $lang/phones_disambig.txt (supplied a training lang/ directory?)"
exit 1;
fi
grep '#' $lang/phones_disambig.txt | awk '{print $2}' > $lang/tmp/disambig_phones.list
clg=$lang/tmp/CLG_${N}_${P}.fst
if [[ ! -f $clg || $clg -ot $lang/LG.fst ]]; then
fstcomposecontext --context-size=$N --central-position=$P \
--read-disambig-syms=$lang/tmp/disambig_phones.list \
--write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.list \
$lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
fstisstochastic $clg || echo "warning: CLG not stochastic."
fi
if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model ]]; then
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
--transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
> $dir/Ha.fst || exit 1;
fi
if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
$dir/HCLGa.fst -ot $clg ]]; then
fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | \
fstminimizeencoded > $dir/HCLGa.fst || exit 1;
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
fi
if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
add-self-loops --self-loop-scale=$loopscale --reorder=true \
$model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
fi
fi
......@@ -69,11 +69,11 @@ if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/model $lang/L.fst \
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/model \
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
"$graphs" "$feats" ark:$dir/ali 2> $dir/align.log || exit 1;
fi
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
if [ $# != 4 ]; then
echo "Usage: steps/decode-tied.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode-tied.sh exp/tri1-2lvl data/test_feb89 data/lang_test exp/tri1-2lvl/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
tied-diag-gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
if [ $# != 4 ]; then
echo "Usage: steps/decode-tied.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode-tied.sh exp/tri1-2lvl data/test_feb89 data/lang_test exp/tri1-2lvl/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
tied-full-gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 10 ]; then
echo "Usage: steps/train-2lvl-5.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-codebooks> <num-gaussians> <num-tree-leaves> <init-style> <rho-stats> <rho-reest>"
echo " e.g.: steps/train-2lvl-5.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl-5 100 1024 1800 0 0 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
max_leaves_first=$5 # Number of codebooks
max_leaves_second=$7 # target num-leaves in tree building.
totgauss=$6 # Target total #Gaussians in codebooks
mingauss=3 # minimum size of codebook
init_style=$8 # (0, init-tied-codebooks) (1, tied-lbg)
rho_stats=$9 # set to > 0 to activate prop/interp of suff. stats. (weights only)
rho_iters=${10} # set to > 0 to actiavte smoothing of new model with prior model (weights only)
ctx_width=5
psmoothing=""
if [ "$rho_iters" != "0" ]; then
psmoothing="--smoothing-weight=$rho_iters --interpolate-weights"
fi
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
acc-tree-stats --context-width=$ctx_width --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
# build tree, make sure to disable leaf clustering
echo "Building tree"
build-tree-two-level --verbose=1 --cluster-leaves=false \
--max-leaves_first=$max_leaves_first \
--max-leaves_second=$max_leaves_second \
--context-width=$ctx_width \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree $dir/tree.map 2> $dir/train_tree.log || exit 1;
# codebook initialization as desired...
if [ $init_style == 0 ]; then
echo "Initializing codebooks based on tree stats"
init-tied-codebooks --split-gaussians=true --full=true --min-gauss=$mingauss --max-gauss=$totgauss \
$dir/tree $dir/treeacc $dir/ubm-full $dir/tree.map 2> $dir/init-tied-codebooks.err > $dir/init-tied-codebooks.out || exit 1;
elif [ $init_style == 1 ]; then
echo "Initializing codebooks by LBG on (ali<->features)"
tied-lbg --full=true --min-gauss=$mingauss --max-gauss=$totgauss --remove-low-count-gaussians=false --interim-em=5 \
$alidir/tree $dir/tree $lang/topo "$feats" ark:$alidir/ali $dir/ubm-full $dir/tree.map 2> $dir/tied-lbg.err > $dir/tied-lbg.out || exit 1;
else
echo "Invalid codebook initialization: $init_style"
exit 1;
fi
echo "Initializing model"
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.{?,??,???} $dir/1.mdl 2> $dir/init_model.log || exit 1;
rm $dir/treeacc
# Convert alignments generated from cont/triphone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
tied-full-gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
tied-full-gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
# suff. stats smoothing?
if [ "$rho_stats" != "0" ]; then
smooth-stats-full --rho=$rho_stats $dir/tree $dir/tree.map $dir/$x.acc $dir/$x.acc.tmp 2> $dir/smooth.$x.err > $dir/smooth.$x.out || exit 1;
mv $dir/$x.acc.tmp $dir/$x.acc
fi
tied-full-gmm-est $psmoothing --write-occs=$dir/$x.occs $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
#rm $dir/$x.mdl $dir/$x.acc
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 10 ]; then
echo "Usage: steps/train-2lvl-diag.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-codebooks> <num-gaussians> <num-tree-leaves> <init-style> <rho-stats> <rho-reest>"
echo " e.g.: steps/train-2lvl-diag.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl-diag 100 1024 1800 0 0 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
max_leaves_first=$5 # Number of codebooks
max_leaves_second=$7 # target num-leaves in tree building.
totgauss=$6 # Target total #Gaussians in codebooks
mingauss=3 # minimum size of codebook
init_style=$8 # (0, init-tied-codebooks) (1, tied-lbg)
rho_stats=$9 # set to > 0 to activate prop/interp of suff. stats. (weights only)
rho_iters=${10} # set to > 0 to actiavte smoothing of new model with prior model (weights only)
psmoothing=""
if [ "$rho_iters" != "0" ]; then
psmoothing="--smoothing-weight=$rho_iters --interpolate-weights"
fi
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
# build tree, make sure to disable the leaf clustering
echo "Building tree"
build-tree-two-level --verbose=1 --cluster-leaves=false \
--max-leaves_first=$max_leaves_first \
--max-leaves_second=$max_leaves_second \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree $dir/tree.map 2> $dir/train_tree.log || exit 1;
# codebook initialization as desired...
if [ $init_style == 0 ]; then
echo "Initializing codebooks based on tree stats"
init-tied-codebooks --split-gaussians=true --full=false --min-gauss=$mingauss --max-gauss=$totgauss \
$dir/tree $dir/treeacc $dir/ubm-diag $dir/tree.map 2> $dir/init-tied-codebooks.err > $dir/init-tied-codebooks.out || exit 1;
elif [ $init_style == 1 ]; then
echo "Initializing codebooks by LBG on (ali<->features)"
tied-lbg --full=false --min-gauss=$mingauss --max-gauss=$totgauss --remove-low-count-gaussians=false --interim-em=5 \
$alidir/tree $dir/tree $lang/topo "$feats" ark:$alidir/ali $dir/ubm-diag $dir/tree.map 2> $dir/tied-lbg.err > $dir/tied-lbg.out || exit 1;
else
echo "Invalid codebook initialization: $init_style"
exit 1
fi
echo "Initializing model"
tied-diag-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-diag.{?,??} $dir/1.mdl 2> $dir/init_model.log || exit 1;
rm $dir/treeacc
# Convert alignments generated from cont/triphone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
tied-diag-gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
tied-diag-gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
# suff. stats smoothing?
if [ "$rho_stats" != "0" ]; then
smooth-stats-diag --rho=$rho_stats $dir/tree $dir/tree.map $dir/$x.acc $dir/$x.acc.tmp 2> $dir/smooth.$x.err > $dir/smooth.$x.out || exit 1;
mv $dir/$x.acc.tmp $dir/$x.acc
fi
tied-diag-gmm-est $psmoothing --write-occs=$dir/$x.occs $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
#rm $dir/$x.mdl $dir/$x.acc
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 10 ]; then
echo "Usage: steps/train-2lvl.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-codebooks> <num-gaussians> <num-tree-leaves> <init-style> <rho-stats> <rho-reest>"
echo " e.g.: steps/train-2lvl.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl 100 1024 1800 0 0 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;