Commit 6f123ae8 authored by Korbinian Riedhammer's avatar Korbinian Riedhammer
Browse files

updated rm/s3/run.sh and respective training scripts

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/discrim@472 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent c86d4ba7
......@@ -62,6 +62,27 @@ steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a
# decode tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a/decode
# Train a classic semi-continuous model using {diag,full} densities
# the numeric parameters following exp/tri1-semi are:
# number of gaussians, something like 4096 for diag, 2048 for full
# number of tree leaves
# type of suff-stats interpolation (0 regular, 1 preserves counts)
# rho-stats, rho value for the smoothing of the statistics (0 for no smoothing)
# rho-iters, rho value to interpolate the parameters with the last iteration (0 for no interpolation)
steps/train_semi_full.sh data/train data/lang exp/tri1_ali exp/tri1-semi 4096 1800 1 10 0
local/decode.sh steps/decode_tied_full.sh exp/tri1-semi
# Train a 2-lvl semi-continuous model using {diag,full} densities
# the numeric parameters following exp/tri1-2lvl are:
# number of codebooks, typically 1-3 times number of phones, the more, the faster
# total number of gaussians, something like 2048 for full, 4096 for diag
# number of tree leaves
# type of suff-stats interpolation (0 regular, 1 preserves counts)
# rho-stats, rho value for the smoothing of the statistics (0 for no smoothing)
# rho-iters, rho value to interpolate the parameters with the last iteration (0 for no interpolation)
steps/train_2lvl_full.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl 104 2048 2500 0 1 10 0
local/decode.sh steps/decode_tied_full.sh exp/tri1-2lvl
# train tri2b [LDA+MLLT]
steps/train_lda_mllt.sh data/train data/lang exp/tri1_ali exp/tri2b
# decode tri2b
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
if [ $# != 4 ]; then
echo "Usage: steps/decode-tied.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode-tied.sh exp/tri1-2lvl data/test_feb89 data/lang_test exp/tri1-2lvl/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
tied-diag-gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and delta-delta plus
# cepstral mean subtraction features. Used, for example, to decode
# mono/ and tri1/
if [ $# != 4 ]; then
echo "Usage: steps/decode-tied.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode-tied.sh exp/tri1-2lvl data/test_feb89 data/lang_test exp/tri1-2lvl/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
if [ ! -f $srcdir/final.mdl ]; then
echo No model file $srcdir/final.mdl
exit 1;
fi
if [[ ! -f $graphdir/HCLG.fst || $graphdir/HCLG.fst -ot $srcdir/final.mdl ]]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# We only do one decoding pass, so there is no point caching the
# CMVN stats-- we make them part of a pipe.
feats="ark:compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:- scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
tied-full-gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/test.tra ark,t:$dir/test.ali \
2> $dir/decode.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/test.tra >& $dir/wer
......@@ -108,7 +108,16 @@ fi
echo "Initializing model"
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.{?,??,???} $dir/1.mdl 2> $dir/init_model.log || exit 1;
if [ $max_leaves_first -lt 10 ]; then
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.? $dir/1.mdl 2> $dir/init_model.log || exit 1;
elif [ $max_leaves_first -lt 100 ]; then
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.{?,??} $dir/1.mdl 2> $dir/init_model.log || exit 1;
elif [ $max_leaves_first -lt 1000 ]; then
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.{?,??,???} $dir/1.mdl 2> $dir/init_model.log || exit 1;
else
echo "Error: Adapt script to allow more than 1000 codebooks!"
exit 1;
fi
rm $dir/treeacc
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 11 ]; then
echo "Usage: steps/train_2lvl_diag.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-codebooks> <num-gaussians> <num-tree-leaves> <init-type> <smooth-type> <rho-stats> <rho-reest>"
echo " e.g.: steps/train_2lvl_diag.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl-diag 100 1024 1800 0 1 10 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
max_leaves_first=$5 # Number of codebooks
max_leaves_second=$7 # target num-leaves in tree building.
totgauss=$6 # Target total #Gaussians in codebooks
mingauss=3 # minimum size of codebook
init_style=$8 # (0, init-tied-codebooks) (1, tied-lbg)
smooth_type=$9 # (0, regular, Interpolate1) (1, preserve counts, Interpolate2)
rho_stats=${10} # set to > 0 to activate prop/interp of suff. stats. (weights only)
rho_iters=${11} # set to > 0 to actiavte smoothing of new model with prior model (weights only)
psmoothing=""
if [ "$rho_iters" != "0" ]; then
psmoothing="--smoothing-weight=$rho_iters --interpolate-weights"
fi
ssmoothing=""
if [ "$smooth_type" == "0" ]; then
ssmoothing=""
elif [ "$smooth_type" == "1" ]; then
ssmoothing="--preserve-counts"
else
echo "Invalid smoothing type $smooth_type"
exit 1
fi
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
# build tree, make sure to disable the leaf clustering
echo "Building tree"
build-tree-two-level --verbose=1 --cluster-leaves=false \
--max-leaves_first=$max_leaves_first \
--max-leaves_second=$max_leaves_second \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree $dir/tree.map 2> $dir/train_tree.log || exit 1;
# codebook initialization as desired...
if [ $init_style == 0 ]; then
echo "Initializing codebooks based on tree stats"
init-tied-codebooks --split-gaussians=true --full=false --min-gauss=$mingauss --max-gauss=$totgauss \
$dir/tree $dir/treeacc $dir/ubm-diag $dir/tree.map 2> $dir/init-tied-codebooks.err > $dir/init-tied-codebooks.out || exit 1;
elif [ $init_style == 1 ]; then
echo "Initializing codebooks by LBG on (ali<->features)"
tied-lbg --full=false --min-gauss=$mingauss --max-gauss=$totgauss --remove-low-count-gaussians=false --interim-em=5 \
$alidir/tree $dir/tree $lang/topo "$feats" ark:$alidir/ali $dir/ubm-diag $dir/tree.map 2> $dir/tied-lbg.err > $dir/tied-lbg.out || exit 1;
else
echo "Invalid codebook initialization: $init_style"
exit 1
fi
echo "Initializing model"
tied-diag-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-diag.{?,??} $dir/1.mdl 2> $dir/init_model.log || exit 1;
rm $dir/treeacc
# Convert alignments generated from cont/triphone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
tied-diag-gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
tied-diag-gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
# suff. stats smoothing?
if [ "$rho_stats" != "0" ]; then
smooth-stats-diag $ssmoothing --rho=$rho_stats $dir/tree $dir/tree.map $dir/$x.acc $dir/$x.acc.tmp 2> $dir/smooth.$x.err > $dir/smooth.$x.out || exit 1;
mv $dir/$x.acc.tmp $dir/$x.acc
fi
tied-diag-gmm-est $psmoothing --write-occs=$dir/$x.occs $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
#rm $dir/$x.mdl $dir/$x.acc
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
# this training scripts assumes tri1 alignments to start from
if [ $# != 11 ]; then
echo "Usage: steps/train_2lvl_full.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-codebooks> <num-gaussians> <num-tree-leaves> <init-type> <smooth-type> <rho-stats> <rho-reest>"
echo " e.g.: steps/train_2lvl_full.sh data/train data/lang exp/tri1_ali exp/tri1-2lvl 100 1024 1800 0 1 10 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
max_leaves_first=$5 # Number of codebooks
max_leaves_second=$7 # target num-leaves in tree building.
totgauss=$6 # Target total #Gaussians in codebooks
mingauss=3 # minimum size of codebook
init_style=$8 # (0, init-tied-codebooks) (1, tied-lbg)
smooth_type=$9 # (0, regular Interpolate1) (1, preserve-counts, Interpolate2)
rho_stats=${10} # set to > 0 to activate smoothing of suff. stats. (weights only)
rho_iters=${11} # set to > 0 to actiavte smoothing of new model with prior model (weights only)
emiters=5 # interim EM iterations for lbg-style initialization
# interpolation between iterations requested?
psmoothing=""
if [ "$rho_iters" != "0" ]; then
psmoothing="--interpolation-weight=$rho_iters --interpolate-weights"
fi
# smothing of stats requested?
ssmoothing=""
if [ "$smooth_type" == "0" ]; then
ssmoothing=""
elif [ "$smooth_type" == "1" ]; then
ssmoothing="--preserve-counts"
else
echo "Invalid smoothing type $smooth_type"
exit 1
fi
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
# build the 2-lvl tree, make sure to not cluster the leaves!
echo "Building tree"
build-tree-two-level --verbose=1 --cluster-leaves=false \
--max-leaves_first=$max_leaves_first \
--max-leaves_second=$max_leaves_second \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree $dir/tree.map 2> $dir/train_tree.log || exit 1;
# codebook initialization as desired...
if [ $init_style == 0 ]; then
echo "Initializing codebooks based on tree stats"
init-tied-codebooks --split-gaussians=true --full=true --min-gauss=$mingauss --max-gauss=$totgauss \
$dir/tree $dir/treeacc $dir/ubm-full $dir/tree.map 2> $dir/init-tied-codebooks.err > $dir/init-tied-codebooks.out || exit 1;
elif [ $init_style == 1 ]; then
echo "Initializing codebooks by LBG on (ali<->features)"
tied-lbg --full=true --min-gauss=$mingauss --max-gauss=$totgauss --remove-low-count-gaussians=false --interim-em=$emiters \
$alidir/tree $dir/tree $lang/topo "$feats" ark:$alidir/ali $dir/ubm-full $dir/tree.map 2> $dir/tied-lbg.err > $dir/tied-lbg.out || exit 1;
else
echo "Invalid codebook initialization: $init_style"
exit 1;
fi
echo "Initializing model"
tied-full-gmm-init-model $dir/tree $lang/topo $dir/tree.map $dir/ubm-full.{?,??,???} $dir/1.mdl 2> $dir/init_model.log || exit 1;
rm $dir/treeacc
# Convert alignments generated from cont/triphone model, to use as initial alignments.
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree ark:$alidir/ali ark:$dir/cur.ali 2>$dir/convert.log
# Debug step only: convert back and check they're the same.
convert-ali $dir/1.mdl $alidir/final.mdl $alidir/tree ark:$dir/cur.ali ark:- \
2>/dev/null | cmp - $alidir/ali || exit 1;
# Make training graphs
echo "Compiling training graphs"
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst ark:$dir/train.tra \
"ark:|gzip -c >$dir/graphs.fsts.gz" 2>$dir/compile_graphs.log || exit 1;
x=1
while [ $x -lt $numiters ]; do
echo Pass $x
if echo $realign_iters | grep -w $x >/dev/null; then
echo "Aligning data"
tied-full-gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/$x.mdl \
"ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$x.log || exit 1;
fi
tied-full-gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
# suff. stats smoothing?
if [ "$rho_stats" != "0" ]; then
smooth-stats-full $ssmoothing --rho=$rho_stats $dir/tree $dir/tree.map $dir/$x.acc $dir/$x.acc.tmp 2> $dir/smooth.$x.err > $dir/smooth.$x.out || exit 1;
mv $dir/$x.acc.tmp $dir/$x.acc
fi
tied-full-gmm-est $psmoothing --write-occs=$dir/$x.occs $dir/$x.mdl $dir/$x.acc $dir/$[$x+1].mdl 2> $dir/update.$x.log || exit 1;
#rm $dir/$x.mdl $dir/$x.acc
x=$[$x+1];
done
( cd $dir; rm final.mdl 2>/dev/null; ln -s $x.mdl final.mdl; ln -s $x.occs final.occs )
echo Done
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Univ. Erlangen-Nuremberg Korbinian Riedhammer
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# Triphone model training, using delta-delta features and cepstral
# mean normalization. It starts from an existing directory (e.g.
# exp/mono), supplied as an argument, which is assumed to be built using
# the same type of features.
if [ $# != 9 ]; then
echo "Usage: steps/train_semi_diag.sh <data-dir> <lang-dir> <ali-dir> <exp-dir> <num-gaussians> <num-tree-leaves> <smooth-type> <rho-stats> <rho-reest>"
echo " e.g.: steps/train_semi_diag.sh data/train data/lang exp/tri1_ali exp/tri1-semi 256 1800 1 10 0"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -f $alidir/final.mdl -o ! -f $alidir/ali ]; then
echo "Error: alignment dir $alidir does not contain final.mdl and ali"
exit 1;
fi
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="5 10 15 20";
silphonelist=`cat $lang/silphones.csl`
numiters=25 # Number of iterations of training
max_leaves=$6 # target num-leaves in tree building.
totgauss=$5 # Target total #Gaussians in codebooks
smooth_type=$7 # (0, regular, Interpolate1) (1, preserve counts, Interpolate2)
rho_stats=$8 # set to > 0 to activate prop/interp of suff. stats. (weights only)
rho_iters=$9 # set to > 0 to actiavte smoothing of new model with prior model (weights only)
emiters=5 # number of initial EM iterations on codebook
emsize=1000 # number of training data for the EM iterations
psmoothing="" # will be filled, if parameter smoothing was requested
if [ "$rho_iters" != "0" ]; then
psmoothing="--smoothing-weight=$rho_iters --interpolate-weights"
fi
ssmoothing=""
if [ "$smooth_type" == "0" ]; then
ssmoothing=""
elif [ "$smooth_type" == "1" ]; then
ssmoothing="--preserve-counts"
else
echo "Invalid smoothing type $smooth_type"
exit 1
fi
mkdir -p $dir
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
echo "Accumulating tree stats"
acc-tree-stats --ci-phones=$silphonelist $alidir/final.mdl "$feats" \
ark:$alidir/ali $dir/treeacc 2> $dir/acc.tree.log || exit 1;
echo "Computing questions for tree clustering"
cat $lang/phones.txt | awk '{print $NF}' | grep -v -w 0 > $dir/phones.list
cluster-phones $dir/treeacc $dir/phones.list $dir/questions.txt 2> $dir/questions.log || exit 1;
scripts/int2sym.pl $lang/phones.txt < $dir/questions.txt > $dir/questions_syms.txt
compile-questions $lang/topo $dir/questions.txt $dir/questions.qst 2>$dir/compile_questions.log || exit 1;
# Have to make silence root not-shared because we will not split it.
scripts/make_roots.pl --separate $lang/phones.txt $silphonelist shared split \
> $dir/roots.txt 2>$dir/roots.log || exit 1;
# build the tree, but disable the post-clustering of the leaves by setting --cluster-thresh=0
echo "Building tree"
build-tree --verbose=1 --cluster-thresh=0 \
--max-leaves=$max_leaves \
$dir/treeacc $dir/roots.txt \
$dir/questions.qst $lang/topo $dir/tree 2> $dir/train_tree.log || exit 1;
# generate dummy tree.map to map all leaves to the single codebook
echo -n "[ " > $dir/tree.map
for i in `seq 1 $max_leaves`; do