Commit 3cf883ef authored by Karel Vesely's avatar Karel Vesely
Browse files

sync with trunk



git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/karel@754 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 58b2d951
......@@ -19,3 +19,8 @@ Explanations of the corpora are below:
Available from the LDC as catalog number LDC93S3A (it may be possible to
get the same data using combinations of other catalog numbers, but this
is the one we used).
swbd: Switchboard. A fairly large amount of telephone speech (2-channel, 8kHz
sampling rate).
This directory is a work in progress.
\ No newline at end of file
......@@ -7,15 +7,21 @@ About the Resource Management corpus:
is the one we used).
Each subdirectory of this directory contains the
scripts for a sequence of experiments.
scripts for a sequence of experiments. Note: s3 is the "default" set of
scripts at the moment.
s1: This setup is experiments with GMM-based systems with various
Maximum Likelihood
techniques including global and speaker-specific transforms.
See a parallel setup in ../wsj/s1
This setup is now slightly deprecated: probably you should look
at the s3 recipes.
s2: This setup is experiments with pure hybrid system.
s3: This is not finished yet; it contains some preliminary work
for some cleaned-up versions of scripts.
s3: This is "new-style" recipes. We recommend to look here first, for
RM recipes.
However, the WSJ or Switchboard s3/ recipes are probably a better
place to look if you're trying to set up something on your own
data, because they're more configurable.
......@@ -28,6 +28,13 @@ exp/decode_tri1_latgen/wer_11:Average WER is 4.188941 (525 / 12533)
exp/decode_tri1_latgen/wer_12:Average WER is 4.420330 (554 / 12533)
exp/decode_tri1_latgen/wer_13:Average WER is 4.555972 (571 / 12533)
# Lattice oracle error rate for exp/decode_tri1_latgen/
# when acoustic scale is set to 10
Beam 0.01 Average WER is 4.085215 (512 / 12533)
Beam 0.5 Average WER is 3.702226 (464 / 12533)
Beam 1 Average WER is 3.279343 (411 / 12533)
Beam 5 Average WER is 1.412272 (177 / 12533)
Beam 10 Average WER is 0.582462 ( 73 / 12533)
# Results on a second pass of triphone system building--
# various configurations.
......@@ -79,32 +86,29 @@ exp/decode_tri2m_vtln_diag/wer:Average WER is 3.087848 (387 / 12533) # + diagon
exp/decode_tri2m_vtln_diag_utt/wer:Average WER is 4.340541 (544 / 12533) # [per-utterance]
exp/decode_tri2m_vtln_nofmllr/wer:Average WER is 5.784728 (725 / 12533) # feature-space VTLN, with no fMLLR
# sgmma is SGMM without speaker vectors.
exp/decode_sgmma/wer:Average WER is 3.319237 (416 / 12533)
exp/decode_sgmma_fmllr/wer:Average WER is 2.934308 (289 / 9849)
exp/decode_sgmma_fmllr/wer:Average WER is 2.928269 (367 / 12533)
exp/decode_sgmma_fmllr_utt/wer:Average WER is 3.303279 (414 / 12533)
exp/decode_sgmma_fmllrbasis_utt/wer:Average WER is 3.191574 (400 / 12533)
# sgmmb is SGMM with speaker vectors.
exp/decode_sgmmb/wer:Average WER is 2.760712 (346 / 12533)
exp/decode_sgmmb_fmllr/wer:Average WER is 2.585175 (324 / 12533)
exp/decode_sgmmb_utt/wer:Average WER is 2.808585 (352 / 12533)
exp/decode_sgmmb/wer:Average WER is 2.521344 (316 / 12533)
exp/decode_sgmmb_fmllr/wer:Average WER is 2.377723 (298 / 12533)
exp/decode_sgmmb_utt/wer:Average WER is 2.728796 (342 / 12533)
# sgmmc is like sgmmb but with gender dependency
exp/decode_sgmmc/wer:Average WER is 2.696880 (338 / 12533)
exp/decode_sgmmc_fmllr/wer:Average WER is 2.457512 (308 / 12533)
# "norm" is normalizing weights per gender..
exp/decode_sgmmc_norm/wer:Average WER is 2.696880 (338 / 12533)
exp/decode_sgmmc_fmllr_norm/wer:Average WER is 2.425596 (304 / 12533)
exp/decode_sgmmc/wer:Average WER is 2.720817 (341 / 12533)
exp/decode_sgmmc_fmllr/wer:Average WER is 2.489428 (312 / 12533)
# sgmmd is like sgmmb but with LDA+MLLT features.
exp/decode_sgmmd/wer:Average WER is 2.449533 (307 / 12533)
exp/decode_sgmmd_fmllr/wer:Average WER is 2.305912 (289 / 12533)
exp/decode_sgmmd/wer:Average WER is 2.656986 (333 / 12533)
exp/decode_sgmmd_fmllr/wer:Average WER is 2.409639 (302 / 12533)
# sgmme is like sgmmb but with LDA+ET features.
exp/decode_sgmme/wer:Average WER is 2.321870 (291 / 12533)
exp/decode_sgmme_fmllr/wer:Average WER is 2.154313 (270 / 12533)
exp/decode_sgmme/wer:Average WER is 2.337828 (293 / 12533)
exp/decode_sgmme_fmllr/wer:Average WER is 2.266018 (284 / 12533)
#### Note: stuff below this line may be out of date / not computed
......@@ -160,3 +164,4 @@ exp/decode_sgmmc_fmllr/wer:Average WER is 2.688901 (337 / 12533)
# 64-bit+ATLAS was 0.171s
# 32-bit+ATLAS was 0.205s
# 64-bit+MKL was 0.291s
......@@ -59,7 +59,10 @@ steps/make_mfcc_test.sh $mfccdir
steps/train_mono.sh
steps/decode_mono.sh &
steps/train_tri1.sh
(steps/decode_tri1.sh ; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ; steps/decode_tri1_latgen.sh) &
(steps/decode_tri1.sh; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ;steps/decode_tri1_latgen.sh; steps/decode_tri1_latoracle.sh) &
# putting here in case anyone needs ctm output.
scripts/make_ctms.sh exp/tri1 exp/decode_tri1
steps/train_tri2a.sh
(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh ;
......@@ -129,7 +132,3 @@ steps/train_ubma.sh
(steps/train_ubmd.sh; steps/train_sgmme.sh; steps/decode_sgmme.sh; steps/decode_sgmme_fmllr.sh;
steps/decode_sgmme_latgen.sh )&
#!/bin/bash
# Copyright 2011 Microsoft Corporation1 Gilles Boulianne
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
if [ $# != 4 ]; then
echo "Usage: scripts/latoracle.sh <lattice-rspecifier> <transcript-text-file> <output-decode-dir> <param>"
exit 1;
fi
. path.sh || exit 1;
inputlat=$1 # e.g. "ark:gunzip -c /pub/tmp/kaldi2011/dpovey/decode_tri1_latgen/test_sep92.lat.gz|"
transcript=$2 # e.g. data_prep/test_sep92_trans.txt
dir=$3 # e.g. exp/decode_tri1_latgen
param=$4 # ouput files will be given "param" suffix as in wer_${param}
mkdir -p $dir
# Create reference transcriptions and lattices
cat $transcript | sed 's:<NOISE>::g' | sed 's:<SPOKEN_NOISE>::g' > $dir/test_trans.filt
( cat $dir/test_trans.filt | \
scripts/sym2int.pl --ignore-first-field data/words.txt | \
lattice-oracle --word-symbol-table=data/words.txt \
"ark:gunzip -c $dir/lats.pruned.gz|" ark:- "ark,t:$dir/oracle_${param}.tra" ) \
2>$dir/oracle.${param}.log
# the ,p option lets it score partial output without dying..
cat $dir/oracle_${param}.tra | \
scripts/int2sym.pl --ignore-first-field data/words.txt | \
sed 's:<s>::' | sed 's:</s>::' | sed 's:<UNK>::g' | \
compute-wer --text --mode=present ark:$dir/test_trans.filt ark,p:- >& $dir/wer_${param}
#!/bin/bash
if [ $# != 2 ]; then
echo "Usage: make_ctms.sh src-dir decode-dir"
exit 1;
fi
model=$1/final.mdl
dir=$2
if [ ! -f $model ]; then
echo "No such file $model";
exit 1;
fi
wbegin=`grep "#1" data/phones_disambig.txt | awk '{print $2}'`
wend=`grep "#2" data/phones_disambig.txt | awk '{print $2}'`
mkdir -p $dir/ctm
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
ali-to-phones $model ark:$dir/test_${test}.ali ark:- | \
phones-to-prons data/L_align.fst $wbegin $wend ark:- ark:$dir/test_${test}.tra ark,t:- | \
prons-to-wordali ark:- \
"ark:ali-to-phones --write-lengths $model ark:$dir/test_${test}.ali ark:-|" ark,t:- | \
scripts/wali_to_ctm.sh - data/words.txt > $dir/ctm/test_${test}.ctm || exit 1;
done
......@@ -88,7 +88,7 @@ if( $silprob == 0.0 ) { # No optional silences: just have one (loop+final) state
@A = split(" ", $_);
$w = shift @A;
if(@A == 0) { # For empty words (<s> and </s>) insert no optional
# silence (not needed as adjacent words supply it)....
# silence (not needed as adjacent words supply it)....
# actually we only hit this case for the lexicon without disambig
# symbols but doesn't ever matter as training transcripts don't have <s> or </s>.
print "$loopstate\t$loopstate\t<eps>\t$w\n";
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
reorder=true # Dan-style, make false for Mirko+Lukas's decoder.
for x in 1 2 3; do
if [ $1 == "--mono" ]; then
monophone_opts="--context-size=1 --central-position=0"
shift;
fi
if [ $1 == "--noreorder" ]; then
reorder=false # we set this for the Kaldi decoder.
shift;
fi
done
if [ $# != 3 ]; then
echo "Usage: scripts/mkgraph.sh <tree> <model> <graphdir>"
exit 1;
fi
if [ -f path.sh ]; then . path.sh; fi
tree=$1
model=$2
dir=$3
mkdir -p $dir
tscale=1.0
loopscale=0.1
fsttablecompose data/L_disambig.fst data/G_UN.fst | fstdeterminizestar --use-log=true | \
fstminimizeencoded > $dir/LG.fst
fstisstochastic $dir/LG.fst || echo "warning: LG not stochastic."
echo "Example string from LG.fst: "
echo
fstrandgen --select=log_prob $dir/LG.fst | fstprint --isymbols=data/phones_disambig.txt --osymbols=data/words.txt -
grep '#' data/phones_disambig.txt | awk '{print $2}' > $dir/disambig_phones.list
fstcomposecontext $monophone_opts \
--read-disambig-syms=$dir/disambig_phones.list \
--write-disambig-syms=$dir/disambig_ilabels.list \
$dir/ilabels < $dir/LG.fst >$dir/CLG.fst
# for debugging:
fstmakecontextsyms data/phones.txt $dir/ilabels > $dir/context_syms.txt
echo "Example string from CLG.fst: "
echo
fstrandgen --select=log_prob $dir/CLG.fst | fstprint --isymbols=$dir/context_syms.txt --osymbols=data/words.txt -
fstisstochastic $dir/CLG.fst || echo "warning: CLG not stochastic."
make-ilabel-transducer --write-disambig-syms=$dir/disambig_ilabels_remapped.list $dir/ilabels $tree $model $dir/ilabels.remapped > $dir/ilabel_map.fst
# Reduce size of CLG by remapping symbols...
fsttablecompose $dir/ilabel_map.fst $dir/CLG.fst | fstdeterminizestar --use-log=true \
| fstminimizeencoded > $dir/CLG2.fst
cat $dir/CLG2.fst | fstisstochastic || echo "warning: CLG2 is not stochastic."
make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \
--transition-scale=$tscale $dir/ilabels.remapped $tree $model > $dir/Ha.fst
fsttablecompose $dir/Ha.fst $dir/CLG2.fst | fstdeterminizestar --use-log=true \
| fstrmsymbols $dir/disambig_tid.list | fstrmepslocal | fstminimizeencoded > $dir/HCLGa.fst
fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
add-self-loops --self-loop-scale=$loopscale --reorder=$reorder $model < $dir/HCLGa.fst > $dir/HCLG.fst
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
fi
fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic."
#The next five lines are debug.
# The last two lines of this block print out some alignment info.
fstrandgen --select=log_prob $dir/HCLG.fst | fstprint --osymbols=data/words.txt > $dir/rand.txt
cat $dir/rand.txt | awk 'BEGIN{printf("0 ");} {if(NF>=3 && $3 != 0){ printf ("%d ",$3); }} END {print ""; }' > $dir/rand_align.txt
show-alignments data/phones.txt $model ark:$dir/rand_align.txt
cat $dir/rand.txt | awk ' {if(NF>=4 && $4 != "<eps>"){ printf ("%s ",$4); }} END {print ""; }'
......@@ -14,7 +14,13 @@
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# converts an utt2spk file to a spk2utt file.
# Takes input from the stdin or from a file argument;
# output goes to the standard out.
if ( @ARGV > 1 ) {
die "Usage: utt2spk_to_spk2utt.pl [ utt2spk ] > spk2utt";
}
while(<>){
@A = split(" ", $_);
......
#!/bin/bash
if [ $# != 2 ]; then
echo "Usage: wali_to_ctm.sh word-alignments words-symbol-table > ctm" 1>&2
exit 1;
fi
wali=$1
symtab=$2
cat $wali | \
perl -ane '@A = split(" "); $utt = shift @A; @A = split(";", join(" ", @A));
$time=0.0;
foreach $a (@A) {
($word,$dur) = split(" ", $a);
$dur *= 0.01;
if ($word != 0) {
print "$utt 1 $word $time $dur $word\n";
}
$time =$time + $dur;
} ' | scripts/int2sym.pl --field 6 $symtab
......@@ -43,3 +43,16 @@ wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer
# Example to show how to get the word alignments:
test=mar87
wbegin=`grep "#1" data/phones_disambig.txt | awk '{print $2}'`
wend=`grep "#2" data/phones_disambig.txt | awk '{print $2}'`
ali-to-phones $model ark:$dir/test_${test}.ali ark:- | \
phones-to-prons data/L_align.fst $wbegin $wend ark:- ark:$dir/test_${test}.tra ark,t:- | \
prons-to-wordali ark:- \
"ark:ali-to-phones --write-lengths $model ark:$dir/test_${test}.ali ark:-|" ark,t:$dir/test_${test}.wali
scripts/wali_to_ctm.sh $dir/test_${test}.wali data/words.txt > $dir/test_${test}.ctm
......@@ -48,7 +48,7 @@ for test in mar87 oct87 feb89 oct89 feb91 sep92; do
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.acwt${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
>& $dir/wer_${test}_${inv_acwt}
done
) &
......
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To view the lattices, a suitable command (after running this) is:
# gunzip -c exp/decode_tri1_latgen/test_feb89.lat.gz | scripts/int2sym.pl --field 3 data/words.txt | less
if [ -f path.sh ]; then . path.sh; fi
dir=exp/decode_tri1_latgen_UN
tree=exp/tri1/tree
model=exp/tri1/final.mdl
graphdir=exp/graph_tri1_UN
mkdir -p $dir
scripts/mkgraph_UN.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
gmm-latgen-simple --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/test_${test}.lat.gz" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
# Now rescore lattices with various acoustic scales, and compute the WER.
for inv_acwt in 6 7 8 9 10 11 12 13; do
acwt=`perl -e "print (1.0/$inv_acwt);"`
lattice-best-path --acoustic-scale=$acwt --word-symbol-table=data/words.txt \
"ark:gunzip -c $dir/test_${test}.lat.gz|" ark:$dir/test_${test}.acwt${inv_acwt}.tra \
2>$dir/rescore_${inv_acwt}.log
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.acwt${inv_acwt}.tra \
>& $dir/wer_${inv_acwt}
done
) &
done
wait
for inv_acwt in "" _6 _7 _8 _9 _10 _11 _12 _13; do
grep WER $dir/wer_{mar87,oct87,feb89,oct89,feb91,sep92}${inv_acwt} | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer${inv_acwt}
done
### The following commands test some properties of our lattice generation: mainly
# that if you generate at a larger beam and prune to a smaller beam, it's the
# same as if you had originally generated at the smaller beam and pruned to
# the smaller beam. Actually I'm not 100% sure we can prove this, but it seems
# to be the case.
#
test=mar87
n=20
for latbeam in 7 10; do
feats="ark:head -$n data/test_${test}.scp | add-deltas --print-args=false scp:- ark:- |"
gmm-latgen-simple --lattice-beam=$latbeam --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" "ark,t:|gzip -c > $dir/tmp.${latbeam}.lat.gz" 2> $dir/test_lat.$latbeam.log
lattice-prune --acoustic-scale=0.08333 --beam=7 "ark:gunzip -c $dir/tmp.${latbeam}.lat.gz|" "ark,t:|gzip -c > $dir/tmp.pr.${latbeam}.lat.gz" 2>$dir/test_prune.$latbeam.log
done
# We like this to be equivalent. I'm not sure if it can be proved that they must be equivalent,
# though.
lattice-equivalent "ark:gunzip -c $dir/tmp.pr.7.lat.gz|" "ark:gunzip -c $dir/tmp.pr.10.lat.gz|" \
|| exit 1;
# Also testing that lattice pruning can be done twice, and the second time has no effect.
lattice-prune --acoustic-scale=0.08333 --beam=7 "ark:gunzip -c $dir/tmp.10.lat.gz|" ark:- | \
lattice-prune --acoustic-scale=0.08333 --beam=7 ark:- ark,t:- | \
gzip -c > $dir/tmp.pr2.10.lat.gz
lattice-equivalent "ark:gunzip -c $dir/tmp.pr.10.lat.gz|" "ark:gunzip -c $dir/tmp.pr2.10.lat.gz|" \
|| exit 1;
# The following command checks that the lattice-lmrescore program
# runs OK and doesn't change the lattices if you apply it twice with opposite
# weights.
lattice-lmrescore --lm-scale=1.0 "ark:gunzip -c $dir/test_feb89.lat.gz|" data/G.fst ark:- | lattice-lmrescore --lm-scale=-1.0 ark:- data/G.fst ark:- | lattice-equivalent ark:- "ark:gunzip -c $dir/test_feb89.lat.gz|" || exit 1;
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# To view the lattices, a suitable command (after running this) is:
# gunzip -c exp/decode_tri1_latgen/test_feb89.lat.gz | scripts/int2sym.pl --field 3 data/words.txt | less
if [ -f path.sh ]; then . path.sh; fi
beamstotry="0.01 0.5 1 5 10"
inputdir=exp/decode_tri1_latgen # default value
if [ $# == 1 ]; then
inputdir=$1;
fi
dir=exp/decode_tri1_latoracle
mkdir -p $dir
inv_acwt=10
acwt=`perl -e "print (1.0/$inv_acwt);"`
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
inputlat="ark:gunzip -c $inputdir/test_${test}.lat.gz|"
# try pruning beams
for beam in $beamstotry; do
echo "Pruning lattices $inputlat with invacwt=$inv_acwt and beam=$beam"
lattice-prune --acoustic-scale=$acwt --beam=$beam \
"$inputlat" "ark,t:|gzip -c>$dir/lats.pruned.gz" \
2>$dir/prune.$beam.log
scripts/latoracle.sh "ark:gunzip -c $dir/lats.pruned.gz|" data_prep/test_${test}_trans.txt $dir ${test}_${beam}
done
done
for beam in $beamstotry; do
echo -n "Beam $beam "
grep WER $dir/wer_{mar87,oct87,feb89,oct89,feb91,sep92}_${beam} | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
| tee $dir/wer_${beam}
done
......@@ -21,6 +21,7 @@
# To be run from ..
if [ -f path.sh ]; then . path.sh; fi
mkdir -p data
cp data_prep/G.txt data/
scripts/make_words_symtab.pl < data/G.txt > data/words.txt
cp data_prep/lexicon.txt data/
......@@ -46,6 +47,9 @@ cat data_prep/train_trans.txt | \
# silprob = 0.5: same prob as word.
scripts/make_lexicon_fst.pl data/lexicon.txt 0.5 sil | fstcompile --isymbols=data/phones.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L.fst
cat data/lexicon.txt | awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
scripts/make_lexicon_fst.pl - 0.5 sil | fstcompile --isymbols=data/phones_disambig.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L_align.fst
scripts/make_lexicon_fst.pl data/lexicon_disambig.txt 0.5 sil '#'$ndisambig | fstcompile --isymbols=data/phones_disambig.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel > data/L_disambig.fst
fstcompile --isymbols=data/words.txt --osymbols=data/words.txt --keep_isymbols=false --keep_osymbols=false data/G.txt > data/G.fst
......
......@@ -112,8 +112,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
......
......@@ -118,8 +118,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
( ali-to-post ark:$dir/cur.ali ark:- | \
......
......@@ -114,7 +114,7 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
fi
if echo $spkvec_iters | grep -w $iter >/dev/null; then
......
......@@ -115,8 +115,8 @@ while [ $iter -lt $numiters ]; do
if echo $realign_iters | grep -w $iter >/dev/null; then
echo "Aligning data"
sgmm-align-compiled $spkvecs_opt $utt2spk_opt $scale_opts "$gselect_opt" \
--retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" "$feats" \
ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;
--beam=8 --retry-beam=40 $dir/$iter.mdl "ark:gunzip -c $dir/graphs.fsts.gz|" \
"$feats" ark:$dir/cur.ali 2> $dir/align.$iter.log || exit 1;