Commit ce79c600 authored by Arnab Ghoshal's avatar Arnab Ghoshal
Browse files

Merging trunk/ r435 changes to sandbox/discrim/.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/discrim@437 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parents b47a690a 4153b98b
......@@ -19,3 +19,8 @@ Explanations of the corpora are below:
Available from the LDC as catalog number LDC93S3A (it may be possible to
get the same data using combinations of other catalog numbers, but this
is the one we used).
swbd: Switchboard. A fairly large amount of telephone speech (2-channel, 8kHz
sampling rate).
This directory is a work in progress.
\ No newline at end of file
......@@ -28,6 +28,13 @@ exp/decode_tri1_latgen/wer_11:Average WER is 4.188941 (525 / 12533)
exp/decode_tri1_latgen/wer_12:Average WER is 4.420330 (554 / 12533)
exp/decode_tri1_latgen/wer_13:Average WER is 4.555972 (571 / 12533)
# Lattice oracle error rate for exp/decode_tri1_latgen/
# when acoustic scale is set to 10
Beam 0.01 Average WER is 4.085215 (512 / 12533)
Beam 0.5 Average WER is 3.702226 (464 / 12533)
Beam 1 Average WER is 3.279343 (411 / 12533)
Beam 5 Average WER is 1.412272 (177 / 12533)
Beam 10 Average WER is 0.582462 ( 73 / 12533)
# Results on a second pass of triphone system building--
# various configurations.
......
......@@ -59,7 +59,10 @@ steps/make_mfcc_test.sh $mfccdir
steps/train_mono.sh
steps/decode_mono.sh &
steps/train_tri1.sh
(steps/decode_tri1.sh; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ;steps/decode_tri1_latgen.sh) &
(steps/decode_tri1.sh; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ;steps/decode_tri1_latgen.sh; steps/decode_tri1_latoracle.sh) &
# putting here in case anyone needs ctm output.
scripts/make_ctms.sh exp/tri1 exp/decode_tri1
steps/train_tri2a.sh
(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh ;
......@@ -129,7 +132,3 @@ steps/train_ubma.sh
(steps/train_ubmd.sh; steps/train_sgmme.sh; steps/decode_sgmme.sh; steps/decode_sgmme_fmllr.sh;
steps/decode_sgmme_latgen.sh )&
#!/bin/bash
# Copyright 2011 Microsoft Corporation1 Gilles Boulianne
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
if [ $# != 4 ]; then
echo "Usage: scripts/latoracle.sh <lattice-rspecifier> <transcript-text-file> <output-decode-dir> <param>"
exit 1;
fi
. path.sh || exit 1;
inputlat=$1 # e.g. "ark:gunzip -c /pub/tmp/kaldi2011/dpovey/decode_tri1_latgen/test_sep92.lat.gz|"
transcript=$2 # e.g. data_prep/test_sep92_trans.txt
dir=$3 # e.g. exp/decode_tri1_latgen
param=$4 # ouput files will be given "param" suffix as in wer_${param}
mkdir -p $dir
# Create reference transcriptions and lattices
cat $transcript | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/test_trans.filt
cat $dir/test_trans.filt | \
scripts/sym2int.pl --ignore-first-field data/words.txt | \
string-to-lattice "ark:$dir/test_trans.lats" 2>$dir/reference.${param}.log
lattice-oracle --word-symbol-table=data/words.txt \
"ark:$dir/test_trans.lats" "ark:gunzip -c $dir/lats.pruned.gz|" "ark,t:$dir/oracle_${param}.tra" \
2>$dir/oracle.${param}.log
# the ,p option lets it score partial output without dying..
cat $dir/oracle_${param}.tra | \
scripts/int2sym.pl --ignore-first-field data/words.txt | \
sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
compute-wer --text --mode=present ark:$dir/test_trans.filt ark,p:- >& $dir/wer_${param}
#!/bin/bash
if [ $# != 2 ]; then
echo "Usage: make_ctms.sh src-dir decode-dir"
exit 1;
fi
model=$1/final.mdl
dir=$2
if [ ! -f $model ]; then
echo "No such file $model";
exit 1;
fi
wbegin=`grep "#1" data/phones_disambig.txt | awk '{print $2}'`
wend=`grep "#2" data/phones_disambig.txt | awk '{print $2}'`
mkdir -p $dir/ctm
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
ali-to-phones $model ark:$dir/test_${test}.ali ark:- | \
phones-to-prons data/L_align.fst $wbegin $wend ark:- ark:$dir/test_${test}.tra ark,t:- | \
prons-to-wordali ark:- \
"ark:ali-to-phones --write-lengths $model ark:$dir/test_${test}.ali ark:-|" ark,t:- | \
scripts/wali_to_ctm.sh - data/words.txt > $dir/ctm/test_${test}.ctm || exit 1;
done
......@@ -88,7 +88,7 @@ if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state
@A = split(" ", $_);
$w = shift @A;
if(@A == 0) { # For empty words (<s> and </s>) insert no optional
# silence (not needed as adjacent words supply it)....
# silence (not needed as adjacent words supply it)....
# actually we only hit this case for the lexicon without disambig
# symbols but doesn't ever matter as training transcripts don't have <s> or </s>.
print "$loopstate\t$loopstate\t<eps>\t$w\n";
......
#!/bin/bash
if [ $# != 2 ]; then
echo "Usage: wali_to_ctm.sh word-alignments words-symbol-table > ctm" 1>&2
exit 1;
fi
wali=$1
symtab=$2
cat $wali | \
perl -ane '@A = split(" "); $utt = shift @A; @A = split(";", join(" ", @A));
$time=0.0;
foreach $a (@A) {
($word,$dur) = split(" ", $a);
$dur *= 0.01;
if ($word != 0) {
print "$utt 1 $word $time $dur $word\n";
}
$time =$time + $dur;
} ' | scripts/int2sym.pl --field 6 $symtab
......@@ -43,3 +43,16 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer
# Example to show how to get the word alignments:
test=mar87
wbegin=`grep "#1" data/phones_disambig.txt | awk '{print $2}'`
wend=`grep "#2" data/phones_disambig.txt | awk '{print $2}'`
ali-to-phones $model ark:$dir/test_${test}.ali ark:- | \
phones-to-prons data/L_align.fst $wbegin $wend ark:- ark:$dir/test_${test}.tra ark,t:- | \
prons-to-wordali ark:- \
"ark:ali-to-phones --write-lengths $model ark:$dir/test_${test}.ali ark:-|" ark,t:$dir/test_${test}.wali
scripts/wali_to_ctm.sh $dir/test_${test}.wali data/words.txt > $dir/test_${test}.ctm
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To view the lattices, a suitable command (after running this) is:
# gunzip -c exp/decode_tri1_latgen/test_feb89.lat.gz | scripts/int2sym.pl --field 3 data/words.txt | less
if [ -f path.sh ]; then . path.sh; fi
beamstotry="0.01 0.5 1 5 10"
inputdir=exp/decode_tri1_latgen # default value
if [ $# == 1 ]; then
inputdir=$1;
fi
dir=exp/decode_tri1_latoracle
mkdir -p $dir
inv_acwt=10
acwt=`perl -e "print (1.0/$inv_acwt);"`
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
inputlat="ark:gunzip -c $inputdir/test_${test}.lat.gz|"
# try pruning beams
for beam in $beamstotry; do
echo "Pruning lattices $inputlat with invacwt=$inv_acwt and beam=$beam"
lattice-prune --acoustic-scale=$acwt --beam=$beam \
"$inputlat" "ark,t:|gzip -c>$dir/lats.pruned.gz" \
2>$dir/prune.$beam.log
scripts/latoracle.sh "ark:gunzip -c $dir/lats.pruned.gz|" data_prep/test_${test}_trans.txt $dir ${test}_${beam}
done
done
for beam in $beamstotry; do
echo -n "Beam $beam "
grep WER $dir/wer_{mar87,oct87,feb89,oct89,feb91,sep92}_${beam} | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
| tee $dir/wer_${beam}
done
......@@ -21,6 +21,7 @@
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
mkdir -p data
cp data_prep/G.txt data/
scripts/make_words_symtab.pl < data/G.txt > data/words.txt
cp data_prep/lexicon.txt data/
......@@ -46,6 +47,9 @@ cat data_prep/train_trans.txt | \
# silprob = 0.5: same prob as word.
scripts/make_lexicon_fst.pl data/lexicon.txt 0.5 sil | fstcompile --isymbols=data/phones.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L.fst
cat data/lexicon.txt | awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
scripts/make_lexicon_fst.pl - 0.5 sil | fstcompile --isymbols=data/phones_disambig.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L_align.fst
scripts/make_lexicon_fst.pl data/lexicon_disambig.txt 0.5 sil '#'$ndisambig | fstcompile --isymbols=data/phones_disambig.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L_disambig.fst
fstcompile --isymbols=data/words.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false data/G.txt > data/G.fst
......
......@@ -69,13 +69,12 @@ steps/train_deltas.sh data/train data/lang exp/tri1_ali exp/tri2a
local/decode.sh steps/decode_deltas.sh exp/tri2a
# train tri2b [LDA+MLLT]
steps/train_lda_mllt.sh data/train data/train.1k data/lang exp/tri1_ali exp/tri2b
steps/train_lda_mllt.sh data/train data/lang exp/tri1_ali exp/tri2b
# decode tri2b
local/decode.sh steps/decode_lda_mllt.sh exp/tri2b
# Get per-speaker subset for ET; train and test ET.
scripts/subset_data_dir.sh --per-spk data/train 15 data/train.15utt
steps/train_lda_et.sh data/train data/train.15utt data/lang exp/tri1_ali exp/tri2c
# Train and test ET.
steps/train_lda_et.sh data/train data/lang exp/tri1_ali exp/tri2c
scripts/mkgraph.sh data/lang_test exp/tri2c exp/tri2c/graph
local/decode.sh steps/decode_lda_et.sh exp/tri2c
......@@ -83,6 +82,18 @@ local/decode.sh steps/decode_lda_et.sh exp/tri2c
steps/align_lda_mllt.sh --graphs "ark,s,cs:gunzip -c exp/tri2b/graphs.fsts.gz|" \
data/train data/lang exp/tri2b exp/tri2b_ali
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri2b_ali exp/tri3d
scripts/mkgraph.sh data/lang_test exp/tri3d exp/tri3d/graph
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri3d
# Align all data with LDA+MLLT+SAT system (tri3d)
steps/align_lda_mllt_sat.sh --graphs "ark,s,cs:gunzip -c exp/tri3d/graphs.fsts.gz|" \
data/train data/lang exp/tri3d exp/tri3d_ali
# Try another pass on top of that.
steps/train_lda_mllt_sat.sh data/train data/lang exp/tri3d_ali exp/tri4d
scripts/mkgraph.sh data/lang_test exp/tri4d exp/tri4d/graph
local/decode.sh steps/decode_lda_mllt_sat.sh exp/tri4d
##### Below here is trash. ######
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation Arnab Ghoshal
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To be run from ..
# This script does training-data alignment given a model built using CMN +
# splice-9-frames + LDA + MLLT features, plus fMLLR/CMLLR. Its output, all in
# its own experimental directory, is cmvn.ark, trans.ark, ali, tree, final.mdl
# and final.mat (the last three are just copied from the source directory).
# Option to use precompiled graphs from last phase, if these
# are available (i.e. if they were built with the same data).
graphs=
if [ "$1" == --graphs ]; then
shift;
graphs=$1
shift
fi
if [ $# != 4 ]; then
echo "Usage: steps/align_lda_mllt_sat.sh <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/align_lda_mllt_sat.sh data/train data/lang exp/tri3d exp/tri3d_ali"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
lang=$2
srcdir=$3
dir=$4
requirements="$srcdir/final.mdl $srcdir/final.alimdl $srcdir/final.mat $srcdir/tree"
for f in $requirements; do
if [ ! -f $f ]; then
echo "align_lda_mllt.sh: no such file $f"
exit 1;
fi
done
mkdir -p $dir
cp $srcdir/{final.mdl,final.alimdl,tree,final.mat} $dir || exit 1; # Create copies in $dir
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
echo "Computing cepstral mean and variance statistics"
compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp \
ark:$dir/cmvn.ark 2>$dir/cmvn.log || exit 1;
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
# Align all training data using the supplied alignment model.
echo "Aligning all training data [with alignment model]"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.alimdl $lang/L.fst \
"$sifeats" ark:$dir/train.tra ark:$dir/pre.ali 2> $dir/align_pass1.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.alimdl \
"$graphs" "$sifeats" ark:$dir/pre.ali 2> $dir/align_pass1.log || exit 1;
fi
echo "Computing fMLLR transforms"
silphonelist=`cat $lang/silphones.csl`
[ -z $silphonelist ] && exit 1;
( ali-to-post ark:$dir/pre.ali ark:- | \
weight-silence-post 0.0 $silphonelist $dir/final.alimdl ark:- ark:- | \
gmm-post-to-gpost $dir/final.alimdl "$sifeats" ark:- ark:- | \
gmm-est-fmllr-gpost --spk2utt=ark:$data/spk2utt $dir/final.mdl "$sifeats" ark:- ark:$dir/trans.ark ) \
2>$dir/trans.log || exit 1;
feats="$sifeats transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
echo "Aligning all training data [with final model and features]"
if [ -z "$graphs" ]; then # --graphs option not supplied [-z means empty string]
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
|| exit 1;
gmm-align $scale_opts --beam=8 --retry-beam=40 $dir/tree $dir/final.mdl $lang/L.fst \
"$feats" ark:$dir/train.tra ark:$dir/ali 2> $dir/align_pass2.log || exit 1;
rm $dir/train.tra
else
gmm-align-compiled $scale_opts --beam=8 --retry-beam=40 $dir/final.mdl \
"$graphs" "$feats" ark:$dir/ali 2> $dir/align_pass2.log || exit 1;
fi
rm $dir/pre.ali
echo "Done."
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Decoding script that works with a GMM model and cepstral
# mean subtraction plus splice-9-frames plus LDA + MLLT + SAT features.
# Two passes of decoding.
# Used, for example, to decode tri3d/.
if [ $# != 4 ]; then
echo "Usage: steps/decode_lda_mllt_sat.sh <model-dir> <data-dir> <lang-dir> <decode-dir>"
echo " e.g.: steps/decode_lda_mllt_sat.sh exp/tri2c data/test_feb89 data/lang_test exp/tri2c/decode_feb89"
exit 1;
fi
srcdir=$1
data=$2
lang=$3
dir=$4
graphdir=$srcdir/graph
silphonelist=`cat $lang/silphones.csl`
mkdir -p $dir
if [ -f path.sh ]; then . path.sh; fi
# -f means file exists; -o means or.
requirements="$srcdir/final.mdl $srcdir/final.alimdl $srcdir/final.mat"
for f in $requirements; do
if [ ! -f $f ]; then
echo "decode_lda_mllt_sat.sh: input file $f does not exist";
exit 1;
fi
done
if [ ! -f $graphdir/HCLG.fst -o $graphdir/HCLG.fst -ot $srcdir/final.mdl ]; then
echo "Graph $graphdir/HCLG.fst does not exist or is too old."
exit 1;
fi
# Compute CMVN stats.
compute-cmvn-stats --spk2utt=ark:$data/spk2utt scp:$data/feats.scp ark,t:$dir/cmvn.ark \
2>$dir/cmvn.log
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
# For Resource Management, we use beam of 30 and acwt of 1/7.
# More normal, LVCSR setups would have a beam of 13 and acwt of 1/15 or so.
# If you decode with a beam of 20 on an LVCSR setup it will be very slow.
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" ark,t:$dir/pass1.tra ark,t:$dir/pass1.ali \
2> $dir/decode_pass1.log || exit 1;
( ali-to-post ark:$dir/pass1.ali ark:- | \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- | \
gmm-post-to-gpost $srcdir/final.alimdl "$sifeats" ark:- ark:- | \
gmm-est-fmllr-gpost --spk2utt=ark:$data/spk2utt $srcdir/final.mdl "$sifeats" \
ark,s,cs:- ark:$dir/trans.ark ) \
2> $dir/trans.log || exit 1;
feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/trans.ark ark:- ark:- |"
# Second pass decoding...
gmm-decode-faster --beam=30.0 --acoustic-scale=0.1429 --word-symbol-table=$lang/words.txt \
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" ark,t:$dir/pass2.tra ark,t:$dir/pass2.ali \
2> $dir/decode_pass2.log || exit 1;
# In this setup there are no non-scored words, so
# scoring is simple.
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field $lang/words.txt $data/text | \
compute-wer --mode=present ark:- ark,p:$dir/pass2.tra >& $dir/wer
......@@ -90,7 +90,7 @@ gmm-init-model --write-occs=$dir/1.occs \
gmm-mixup --mix-up=$numgauss $dir/1.mdl $dir/1.occs $dir/1.mdl \
2>$dir/mixup.log || exit 1;
rm $dir/treeacc
#rm $dir/treeacc
# Convert alignments generated from monophone model, to use as initial alignments.
......
......@@ -20,25 +20,19 @@
# exp/tri1), supplied as an argument, which is assumed to be built using
# cepstral mean subtraction plus delta features.
if [ $# != 5 ]; then
echo "Usage: steps/train_lda_mllt.sh <data-dir> <data-subset-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_lda_mllt.sh data/train data/train.15utt data/lang exp/tri1_ali exp/tri2c"
if [ $# != 4 ]; then
echo "Usage: steps/train_lda_et.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
echo " e.g.: steps/train_lda_et.sh data/train data/lang exp/tri1_ali exp/tri2c"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
data=$1
datasub=$2
lang=$3
alidir=$4
dir=$5
lang=$2
alidir=$3
dir=$4
# Make sure datasub is a subset of data.
scripts/is_subset_scp.pl $datasub/feats.scp $data/feats.scp || exit 1;
# Make sure datasub doesn't have missing speakers vs. data.
scripts/is_subset_scp.pl $data/spk2utt $datasub/spk2utt || exit 1;
numiters_et=15 # Before this, update et parameters.
normtype=offset # et option; could be offset [recommended], or none
......@@ -55,16 +49,14 @@ numgauss=$[$numleaves + $numleaves/2]; # starting num-Gauss.
# up to final amount.
totgauss=9000 # Target #Gaussians
incgauss=$[($totgauss-$numgauss)/$maxiterinc] # per-iter increment for #Gauss
randprune=4.0
mkdir -p $dir $dir/warps
# This variable gets overwritten in this script.
basefeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
feats="$basefeats"
basefeatsub="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$datasub/utt2spk ark:$alidir/cmvn.ark scp:$datasub/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
featsub="$basefeatsub"
splicedfeatsub="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$datasub/utt2spk ark:$alidir/cmvn.ark scp:$datasub/feats.scp ark:- | splice-feats ark:- ark:- |"
splicedfeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- |"
# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
......@@ -74,7 +66,7 @@ echo "Accumulating LDA statistics."
( ali-to-post ark:$alidir/ali ark:- | \
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- | \
acc-lda $alidir/final.mdl "$splicedfeatsub" ark,s,cs:- $dir/lda.acc ) \
acc-lda --rand-prune=$randprune $alidir/final.mdl "$splicedfeats" ark,s,cs:- $dir/lda.acc ) \
2>$dir/lda_acc.log
est-lda $dir/lda.mat $dir/lda.acc 2>$dir/lda_est.log
......@@ -138,8 +130,9 @@ while [ $x -lt $numiters ]; do
echo "Re-estimating ET transforms"
( ali-to-post ark:$dir/cur.ali ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x.mdl ark:- ark:- | \
gmm-post-to-gpost $dir/$x.mdl "$featsub" ark:- ark:- | \
gmm-est-et --spk2utt=ark:$datasub/spk2utt $dir/$x.mdl $dir/$x.et "$basefeatsub" \
rand-prune-post $randprune ark:- ark:- | \
gmm-post-to-gpost $dir/$x.mdl "$feats" ark:- ark:- | \
gmm-est-et --spk2utt=ark:$data/spk2utt $dir/$x.mdl $dir/$x.et "$basefeats" \
ark,s,cs:- ark:$dir/$x.trans ark,t:$dir/warps/$x.warp ) \
2> $dir/trans.$x.log || exit 1;
......@@ -147,7 +140,6 @@ while [ $x -lt $numiters ]; do
if [ $x -gt 1 ]; then rm $dir/$[$x-1].trans; fi
# Set features to include transform.
feats="$basefeats transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/$x.trans ark:- ark:- |"
featsub="$basefeatsub transform-feats --utt2spk=ark:$data/utt2spk ark:$dir/$x.trans ark:- ark:- |"
fi
gmm-acc-stats-ali --binary=false $dir/$x.mdl "$feats" ark:$dir/cur.ali $dir/$x.acc 2> $dir/acc.$x.log || exit 1;
......@@ -161,15 +153,17 @@ while [ $x -lt $numiters ]; do
if [ $[$x%2] == 0 ]; then # Estimate A:
( ali-to-post ark:$dir/cur.ali ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
gmm-post-to-gpost $dir/$x1.mdl "$featsub" ark:- ark:- | \
gmm-et-acc-a $spk2utt_opt --verbose=1 $dir/$x1.mdl $dir/$x.et "$basefeatsub" \
rand-prune-post $randprune ark:- ark:- | \
gmm-post-to-gpost $dir/$x1.mdl "$feats" ark:- ark:- | \
gmm-et-acc-a --spk2utt=ark:$data/spk2utt --verbose=1 $dir/$x1.mdl $dir/$x.et "$basefeats" \
ark,s,cs:- $dir/$x.et_acc_a ) 2> $dir/acc_a.$x.log || exit 1;
gmm-et-est-a --verbose=1 $dir/$x.et $dir/$x1.et $dir/$x.et_acc_a 2> $dir/update_a.$x.log || exit 1;
rm $dir/$x.et_acc_a
else
( ali-to-post ark:$dir/cur.ali ark:- | \
weight-silence-post 0.0 $silphonelist $dir/$x1.mdl ark:- ark:- | \
gmm-acc-mllt $dir/$x1.mdl "$featsub" ark:- $dir/$x.mllt_acc ) 2> $dir/acc_b.$x.log || exit 1;
gmm-acc-mllt --rand-prune=$randprune $dir/$x1.mdl "$feats" ark:- \
$dir/$x.mllt_acc ) 2> $dir/acc_b.$x.log || exit 1;
est-mllt $dir/$x.mat $dir/$x.mllt_acc 2> $dir/update_b.$x.log || exit 1;
gmm-et-apply-c $dir/$x.et $dir/$x.mat $dir/$x1.et 2>>$dir/update_b.$x.log || exit 1;
gmm-transform-means $dir/$x.mat $dir/$x1.mdl $dir/$x1.mdl 2>> $dir/update_b.$x.log || exit 1;
......@@ -206,7 +200,7 @@ defaultfeats="$basefeats transform-feats $dir/B.mat ark:- ark:- |"
( ali-to-post ark:$dir/cur.ali ark:- | \
gmm-acc-stats-twofeats $dir/$x.mdl "$feats" "$defaultfeats" ark:- $dir/$x.acc2 ) 2>$dir/acc_alimdl.log || exit 1;
# Update model.
gmm-est --write-occs=$dir/final.occs --remove-low-count-gaussians=false $dir/$x.mdl $dir/$x.acc2 $dir/$x.alimdl \