Commit ee2af852 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: Script progress RE neural net training (mostly minor) and SGMM...

sandbox/dan2: Script progress RE neural net training (mostly minor) and SGMM RE quinphone (doesn't help)

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@2828 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 8ed96ecb
......@@ -86,8 +86,8 @@ exit 0
# Deep neural net -- various types of hybrid system.
%WER 2.02 [ 253 / 12533, 27 ins, 64 del, 162 sub ] exp/nnet4a/decode/wer_4
%WER 9.77 [ 1224 / 12533, 95 ins, 251 del, 878 sub ] exp/nnet4a/decode_ug/wer_9
%WER 1.84 [ 231 / 12533, 23 ins, 55 del, 153 sub ] exp/nnet4b/decode/wer_5
%WER 9.04 [ 1133 / 12533, 110 ins, 153 del, 870 sub ] exp/nnet4b/decode_ug/wer_7
%WER 1.68 [ 211 / 12533, 20 ins, 53 del, 138 sub ] exp/nnet4b/decode/wer_5
%WER 8.96 [ 1123 / 12533, 97 ins, 166 del, 860 sub ] exp/nnet4b/decode_ug/wer_8
%WER 1.71 [ 214 / 12533, 23 ins, 46 del, 145 sub ] exp/nnet4c/decode/wer_4
%WER 9.02 [ 1130 / 12533, 91 ins, 181 del, 858 sub ] exp/nnet4c/decode_ug/wer_8
......
......@@ -43,7 +43,7 @@ if [ $stage -le 2 ]; then
--mix-up 4000 \
--cmd "$decode_cmd" \
--hidden-layer-dim 450 \
data/nnet4b/train_perturbed_fbank data/lang exp/tri3b_ali_perturbed_mfcc exp/nnet4b || exit 1
data/train_perturbed_fbank data/lang exp/tri3b_ali_perturbed_mfcc exp/nnet4b || exit 1
fi
......
......@@ -33,7 +33,7 @@ if [ $stage -le 1 ]; then
fi
if [ $stage -le 2 ]; then
steps/nnet2/train_block.sh \
steps/nnet2/train_block.sh --stage "$train_stage" \
--cleanup false \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-epochs 10 --num-epochs-extra 5 \
......@@ -67,80 +67,3 @@ fi
exit 0;
mkdir -p conf/nnet5b
all_fbankdirs=""
all_mfccdirs=""
pairs="1.1-1.0 1.05-1.2 1.0-0.8 0.95-1.1 0.9-0.9"
for pair in $pairs; do
vtln_warp=`echo $pair | cut -d- -f1`
time_warp=`echo $pair | cut -d- -f2`
fs=`perl -e "print ($time_warp*10);"`
fbank_conf=conf/nnet5b/fbank_vtln${vtln_warp}_time${time_warp}.conf
( echo "--num-mel-bins=40"; echo "--frame-shift=$fs"; echo "--vtln-warp=$vtln_warp" ) > $fbank_conf
echo "Making filterbank features for $pair"
fbank_data=data/nnet5b/train_si284_fbank_vtln${vtln_warp}_time${time_warp}
all_fbankdirs="$all_fbankdirs $fbank_data"
utils/copy_data_dir.sh --spk-prefix ${pair}- --utt-prefix ${pair}- data/train_si284 $fbank_data
steps/make_fbank.sh --fbank-config $fbank_conf --nj 8 --cmd "run.pl" $fbank_data exp/nnet5b/make_mfcc/mfcc_$pair $featdir
steps/compute_cmvn_stats.sh $fbank_data exp/nnet5b/fbank_$pair $featdir
echo "Making MFCC features for $pair"
mfcc_data=data/nnet5b/train_si284_mfcc_vtln${vtln_warp}_time${time_warp}
mfcc_conf=conf/nnet5b/mfcc_vtln${vtln_warp}_time${time_warp}.conf
( echo "--use-energy=false"; echo "--frame-shift=$fs" ; echo "--vtln-warp=$vtln_warp" ) > $mfcc_conf
utils/copy_data_dir.sh --spk-prefix ${pair}- --utt-prefix ${pair}- data/train_si284 $mfcc_data
steps/make_mfcc.sh --mfcc-config $mfcc_conf --nj 8 --cmd "run.pl" $mfcc_data exp/nnet5b/make_mfcc/mfcc_$pair $featdir
steps/compute_cmvn_stats.sh $mfcc_data exp/nnet5b/mfcc_$pair $featdir
all_mfccdirs="$all_mfccdirs $mfcc_data"
done
utils/combine_data.sh data/nnet5b/train_si284_fbank_all $all_fbankdirs
utils/combine_data.sh data/nnet5b/train_si284_mfcc_all $all_mfccdirs
steps/align_fmllr.sh --nj 8 --cmd "$train_cmd" \
data/nnet5b/train_si284_mfcc_all data/lang exp/tri3b exp/tri3b_ali_nnet5b
# In the combined filterbank directory, create a file utt2uniq which maps
# our extended utterance-ids to "unique utterances". This enables the
# script steps/nnet2/get_egs.sh to hold out data in a more proper way.
cat data/nnet5b/train_si284_fbank_all/utt2spk | awk '{print $1;}' | \
perl -ane ' chop; $utt = $_; s/[-0-9\.]+-[-0-9\.]+-//; print "$utt $_\n"; ' \
> data/nnet5b/train_si284_fbank_all/utt2uniq
fi
if [ $stage -le 1 ]; then
steps/nnet2/train_block.sh --stage "$train_stage" \
--bias-stddev 0.5 --splice-width 7 --egs-opts "--feat-type raw" \
--softmax-learning-rate-factor 0.5 --cleanup false \
--initial-learning-rate 0.04 --final-learning-rate 0.004 \
--num-epochs-extra 10 --add-layers-period 1 \
--mix-up 4000 \
--cmd "$decode_cmd" \
--hidden-layer-dim 450 \
data/nnet5b/train_si284_fbank_all data/lang exp/tri3b_ali_nnet5b exp/nnet5b || exit 1
fi
if [ $stage -le 2 ]; then
# Create the testing data.
featdir=`pwd`/mfcc
mkdir -p $featdir
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
done
fi
if [ $stage -le 3 ]; then
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
exp/tri3b/graph data/test_fbank exp/nnet5b/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
exp/tri3b/graph_ug data/test_fbank exp/nnet5b/decode_ug
fi
......@@ -79,6 +79,20 @@
exp/sgmm2_5b/graph_bd_tgpr data/test_eval92 exp/sgmm2_5b/decode_bd_tgpr_eval92
) &
(
steps/train_sgmm2.sh --cmd "$train_cmd" \
--context-opts "--context-width=5 --central-position=2" \
11000 25000 data/train_si284 data/lang exp/tri4b_ali_si284 \
exp/ubm5b/final.ubm exp/sgmm2_5c || exit 1;
# Decode from lattices in exp/sgmm2_5b
steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
data/test_dev93 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_dev93 exp/sgmm2_5c/decode_tgpr_dev93
steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
data/test_eval92 data/lang_test_tgpr exp/sgmm2_5b/decode_tgpr_eval92 exp/sgmm2_5c/decode_tgpr_eval92
) &
steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali_si284 \
--use-graphs true --use-gselect true data/train_si284 data/lang exp/sgmm2_5b exp/sgmm2_5b_ali_si284
......
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does decoding with an SGMM2 system, with speaker vectors. If the
# SGMM2 system was built on top of fMLLR transforms from a conventional system,
# you should provide the --transform-dir option.
# This script does not use a decoding graph, but instead you provide
# a previous decoding directory with lattices in it. This script will only
# make use of the word sequences in the lattices; it limits the decoding
# to those sequences. You should also provide a "lang" directory from
# which this script will use the G.fst and L.fst.
# Begin configuration section.
stage=1
alignment_model=
transform_dir= # dir to find fMLLR transforms.
acwt=0.08333 # Just a default value, used for adaptation and beam-pruning..
batch_size=75 # Limits memory blowup in compile-train-graphs-fsts
cmd=run.pl
beam=20.0
gselect=15 # Number of Gaussian-selection indices for SGMMs. [Note:
# the first_pass_gselect variable is used for the 1st pass of
# decoding and can be tighter.
first_pass_gselect=3 # Use a smaller number of Gaussian-selection indices in
# the 1st pass of decoding (lattice generation).
max_active=7000
lat_beam=8.0 # Beam we use in lattice generation.
vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for
# speaker-vector computation. Can be quite tight (actually we could
# probably just do best-path.
use_fmllr=false
fmllr_iters=10
fmllr_min_count=1000
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: steps/decode_sgmm_fromlats.sh [options] <data-dir> <lang-dir> <old-decode-dir> <decode-dir>"
echo ""
echo "main options (for others, see top of script file)"
echo " --transform-dir <decoding-dir> # directory of previous decoding"
echo " # where we can find transforms for SAT systems."
echo " --alignment-model <ali-mdl> # Model for the first-pass decoding."
echo " --config <config-file> # config containing options"
echo " --cmd <cmd> # Command to run in parallel with"
echo " --beam <beam> # Decoding beam; default 13.0"
exit 1;
fi
data=$1
lang=$2
olddir=$3
dir=$4
srcdir=`dirname $dir`
for f in $data/feats.scp $lang/G.fst $lang/L_disambig.fst $lang/phones/disambig.int \
$srcdir/final.mdl $srcdir/tree $olddir/lat.1.gz; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $olddir/num_jobs` || exit 1;
sdata=$data/split$nj;
silphonelist=`cat $lang/phones/silence.csl` || exit 1
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
gselect_opt="--gselect=ark,s,cs:gunzip -c $dir/gselect.JOB.gz|"
gselect_opt_1stpass="$gselect_opt copy-gselect --n=$first_pass_gselect ark:- ark:- |"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
## Set up features
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
if [ -z "$transform_dir" ] && [ -f $olddir/trans.1 ]; then
transform_dir=$olddir
fi
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif grep 'transform-feats --utt2spk' $srcdir/log/acc.0.1.log 2>/dev/null; then
echo "$0: **WARNING**: you seem to be using an SGMM system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
fi
## Calculate FMLLR pre-transforms if needed. We are doing this here since this
## step is requried by models both with and without speaker vectors
if $use_fmllr; then
if [ ! -f $srcdir/final.fmllr_mdl ] || [ $srcdir/final.fmllr_mdl -ot $srcdir/final.mdl ]; then
echo "$0: computing pre-transform for fMLLR computation."
sgmm2-comp-prexform $srcdir/final.mdl $srcdir/final.occs $srcdir/final.fmllr_mdl || exit 1;
fi
fi
## Save Gaussian-selection info to disk.
# Note: we can use final.mdl regardless of whether there is an alignment model--
# they use the same UBM.
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/gselect.JOB.log \
sgmm2-gselect --full-gmm-nbest=$gselect $srcdir/final.mdl \
"$feats" "ark:|gzip -c >$dir/gselect.JOB.gz" || exit 1;
fi
## Work out name of alignment model. ##
if [ -z "$alignment_model" ]; then
if [ -f "$srcdir/final.alimdl" ]; then alignment_model=$srcdir/final.alimdl;
else alignment_model=$srcdir/final.mdl; fi
fi
[ ! -f "$alignment_model" ] && echo "$0: no alignment model $alignment_model " && exit 1;
# Generate state-level lattice which we can rescore. This is done with the
# alignment model and no speaker-vectors.
if [ $stage -le 2 ]; then
$cmd JOB=1:$nj $dir/log/decode_pass1.JOB.log \
lattice-to-fst "ark:gunzip -c $olddir/lat.JOB.gz|" ark:- \| \
fsttablecompose "fstproject --project_output=true $lang/G.fst | fstarcsort |" ark:- ark:- \| \
fstdeterminizestar ark:- ark:- \| \
compile-train-graphs-fsts --read-disambig-syms=$lang/phones/disambig.int \
--batch-size=$batch_size $scale_opts \
$srcdir/tree $srcdir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
sgmm2-latgen-faster --max-active=$max_active --beam=$beam --lattice-beam=$lat_beam \
--acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
--word-symbol-table=$lang/words.txt "$gselect_opt_1stpass" $alignment_model \
"ark:-" "$feats" "ark:|gzip -c > $dir/pre_lat.JOB.gz" || exit 1;
fi
## Check if the model has speaker vectors
spkdim=`sgmm2-info $srcdir/final.mdl | grep 'speaker vector' | awk '{print $NF}'`
if [ $spkdim -gt 0 ]; then ### For models with speaker vectors:
# Estimate speaker vectors (1st pass). Prune before determinizing
# because determinization can take a while on un-pruned lattices.
# Note: the sgmm2-post-to-gpost stage is necessary because we have
# a separate alignment-model and final model, otherwise we'd skip it
# and use sgmm2-est-spkvecs.
if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $alignment_model ark:- ark:- \| \
sgmm2-post-to-gpost "$gselect_opt" $alignment_model "$feats" ark:- ark:- \| \
sgmm2-est-spkvecs-gpost --spk2utt=ark:$sdata/JOB/spk2utt \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/pre_vecs.JOB" || exit 1;
fi
# Estimate speaker vectors (2nd pass). Since we already have spk vectors,
# at this point we need to rescore the lattice to get the correct posteriors.
if [ $stage -le 4 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass2.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
$srcdir/final.mdl "$feats" ark,s,cs:- "ark:$dir/vecs.JOB" || exit 1;
fi
rm $dir/pre_vecs.*
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ]; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz
else ### For models without speaker vectors:
if $use_fmllr; then
# Estimate fMLLR transforms (note: these may be on top of any
# fMLLR transforms estimated with the baseline GMM system.
if [ $stage -le 5 ]; then # compute fMLLR transforms.
echo "$0: computing fMLLR transforms."
$cmd JOB=1:$nj $dir/log/fmllr.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
sgmm2-rescore-lattice --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm2-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" \
--fmllr-iters=$fmllr_iters --fmllr-min-count=$fmllr_min_count \
$srcdir/final.fmllr_mdl "$feats" ark,s,cs:- "ark:$dir/trans.JOB" || exit 1;
fi
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$dir/trans.JOB ark:- ark:- |"
fi
# Now rescore the state-level lattices with the adapted features and the
# corresponding model. Prune and determinize the lattices to limit
# their size.
if [ $stage -le 6 ] && $use_fmllr; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
rm $dir/pre_lat.*.gz
else # Already done with decoding if no adaptation needed.
for n in `seq 1 $nj`; do
mv $dir/pre_lat.${n}.gz $dir/lat.${n}.gz
done
fi
fi
# The output of this script is the files "lat.*.gz"-- we'll rescore this at
# different acoustic scales to get the final output.
if [ $stage -le 7 ]; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
echo "score best paths"
local/score.sh --cmd "$cmd" $data $lang $dir
echo "score confidence and timing with sclite"
#local/score_sclite_conf.sh --cmd "$cmd" --language turkish $data $lang $dir
fi
echo "Decoding done."
exit 0;
......@@ -77,7 +77,8 @@ utils/combine_data.sh $data $all_feature_dirs
# our extended utterance-ids to "unique utterances". This enables the
# script steps/nnet2/get_egs.sh to hold out data in a more proper way.
cat $data/utt2spk | \
perl -e ' while(<STDIN>){ $x=$_; chop $x; foreach $pair (@ARGS) { s/^$pair-// && last; } print "$x $_"; } ' \
perl -e ' while(<STDIN>){ @A=split; $x=shift @A; $y=$x;
foreach $pair (@ARGV) { $y =~ s/^${pair}-// && last; } print "$x $y\n"; } ' $pairs \
> $data/utt2uniq
if $cleanup; then
......
......@@ -123,7 +123,7 @@ fi
if [ $stage -le -6 ]; then
echo "$0: accumulating tree stats"
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
acc-tree-stats --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
acc-tree-stats $context_opts --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
"ark:gunzip -c $alidir/ali.JOB.gz|" $dir/JOB.treeacc || exit 1;
[ "`ls $dir/*.treeacc | wc -w`" -ne "$nj" ] && echo "$0: Wrong #tree-stats" && exit 1;
sum-tree-stats $dir/treeacc $dir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
......@@ -133,13 +133,13 @@ fi
if [ $stage -le -5 ]; then
echo "$0: Getting questions for tree clustering."
# preparing questions, roots file...
cluster-phones $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
cat $lang/phones/extra_questions.int >> $dir/questions.int
compile-questions $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
echo "$0: Building the tree"
$cmd $dir/log/build_tree.log \
build-tree-two-level --binary=false --verbose=1 --max-leaves-first=$num_groups \
build-tree-two-level $context_opts --binary=false --verbose=1 --max-leaves-first=$num_groups \
--max-leaves-second=$num_pdfs $dir/treeacc $lang/phones/roots.int \
$dir/questions.qst $lang/topo $dir/tree $dir/pdf2group.map || exit 1;
fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment