train_lda_etc_mmi.sh 6.05 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.

# To be run from ..
# This directory does MMI model training, starting from trained
# models.  The models must be trained on raw features plus
# cepstral mean normalization plus splice-9-frames, an LDA+[something] 
# transform, then possibly speaker-specific affine transforms 
# (fMLLR/CMLLR).   This script works out from the alignment directory
# whether you trained with some kind of speaker-specific transform.
#
# This training run starts from an initial directory that has
# alignments, models and transforms from an LDA+MLLT system:
#  ali, final.mdl, final.mat

29 30 31 32 33 34
b=0 # boosting constant, for boosted MMI. 
if [ $1 == "--boost" ]; then # e.g. "--boost 0.05"
   shift;
   b=$1;
   shift;
fi
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

if [ $# != 4 ]; then
   echo "Usage: steps/train_lda_etc_mmi.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
   echo " e.g.: steps/train_lda_etc_mmi.sh data/train data/lang exp/tri3d_ali exp/tri4a"
   exit 1;
fi

if [ -f path.sh ]; then . path.sh; fi

data=$1
lang=$2
alidir=$3
dir=$4

num_iters=4
acwt=0.1
beam=20
latticebeam=10
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
54
silphonelist=`cat $lang/silphones.csl`
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95

mkdir -p $dir
cp $alidir/tree $alidir/final.mat $dir # Will use the same tree and transforms as in the baseline.
cp $alidir/final.mdl $dir/0.mdl

if [ -f $alidir/final.alimdl ]; then
   cp $alidir/final.alimdl $dir/final.alimdl
   cp $alidir/final.mdl $dir/final.adaptmdl # This model used by decoding scripts,
   # when you don't want to compute fMLLR transforms with the MMI-trained model.
fi

scripts/split_scp.pl $data/feats.scp $dir/feats{0,1,2,3}.scp

feats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
for n in 0 1 2 3; do
  featspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$alidir/cmvn.ark scp:$dir/feats$n.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
done

if [ -f $alidir/trans.ark ]; then
   echo "Running with speaker transforms $alidir/trans.ark"
   feats="$feats transform-feats --utt2spk=ark:$data/utt2spk ark:$alidir/trans.ark ark:- ark:- |"
   for n in 0 1 2 3; do
     featspart[$n]="${featspart[$n]} transform-feats --utt2spk=ark:$data/utt2spk ark:$alidir/trans.ark ark:- ark:- |"
   done
fi

# compute integer form of transcripts.
scripts/sym2int.pl --ignore-first-field $lang/words.txt < $data/text > $dir/train.tra \
  || exit 1;

cp -r $lang $dir/lang

# Compute grammar FST which corresponds to unigram decoding graph.
cat $dir/train.tra | awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
 scripts/make_unigram_grammar.pl | fstcompile > $dir/lang/G.fst \
  || exit 1;

# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and 
# final.mdl from $alidir; the output HCLG.fst goes in $dir/graph.

Dan Povey's avatar
Dan Povey committed
96
scripts/mkgraph.sh $dir/lang $alidir $dir/dgraph || exit 1;
97 98 99 100 101 102 103 104

echo "Making denominator lattices"


rm $dir/.error 2>/dev/null
for n in 0 1 2 3; do
   gmm-latgen-simple --beam=$beam --lattice-beam=$latticebeam --acoustic-scale=$acwt \
    --word-symbol-table=$lang/words.txt \
105 106 107
    $alidir/final.mdl $dir/dgraph/HCLG.fst "${featspart[$n]}" \
    "ark:|lattice-boost-ali --b=$b --silence-phones=$silphonelist $alidir/final.mdl ark:- ark,s,cs:$alidir/ali ark:- | gzip -c >$dir/lat$n.gz" \
      2>$dir/decode_den.$n.log || touch $dir/.error &
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
done
wait
if [ -f $dir/.error ]; then
   echo "Error creating denominator lattices"
   exit 1;
fi

# No need to create "numerator" alignments/lattices: we just use the 
# alignments in $alidir.

echo "Note: ignore absolute offsets in the objective function values"
echo "This is caused by not having LM, lexicon or transition-probs in numerator"

x=0;
while [ $x -lt $num_iters ]; do
  echo "Iteration $x: getting denominator stats."
  # Get denominator stats...
  if [ $x -eq 0 ]; then
    ( lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat?.gz|" ark:- | \
      gmm-acc-stats $dir/$x.mdl "$feats" ark:- $dir/den_acc.$x.acc ) \
     2>$dir/acc_den.$x.log || exit 1;
  else # Need to recompute acoustic likelihoods...
   ( gmm-rescore-lattice $dir/$x.mdl "ark:gunzip -c $dir/lat?.gz|" "$feats" ark:- | \
      lattice-to-post --acoustic-scale=$acwt ark:- ark:- | \
      gmm-acc-stats $dir/$x.mdl "$feats" ark:- $dir/den_acc.$x.acc ) \
     2>$dir/acc_den.$x.log || exit 1;
  fi
  echo "Iteration $x: getting numerator stats."
  # Get numerator stats...
  gmm-acc-stats-ali $dir/$x.mdl "$feats" ark:$alidir/ali $dir/num_acc.$x.acc \
   2>$dir/acc_num.$x.log || exit 1;
  # Update.
  gmm-est-mmi $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl \
    2>$dir/update.$x.log || exit 1;

  den=`grep Overall $dir/acc_den.$x.log  | grep lattice-to-post | awk '{print $7}'`
  num=`grep Overall $dir/acc_num.$x.log  | grep gmm-acc-stats-ali | awk '{print $11}'`
  diff=`perl -e "print ($num * $acwt - $den);"`
  impr=`grep Overall $dir/update.$x.log | awk '{print $10;}'`
  impr=`perl -e "print ($impr * $acwt);"` # auxf impr normalized by multiplying by
  # kappa, so it's comparable to an objective-function change.
Dan Povey's avatar
Dan Povey committed
149
  echo On iter $x, objf was $diff, auxf improvement was $impr | tee $dir/objf.$x.log
150 151 152 153 154 155 156 157 158 159 160

  x=$[$x+1]
done

# Just copy the source-dir's occs, in case we later need them for something...
cp $alidir/final.occs $dir
( cd $dir; ln -s $x.mdl final.mdl )


echo Done