Commit 2e374f65 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: committing changes to support 'perturbed' training (see...

trunk: committing changes to support 'perturbed' training (see train-nnet-perturbed.h).  This seems to only be subsantially helpful for small datasets like RM, and I may remove it in future.  Also adding a script that could be helpful for data cleanup, find_bad_utts.sh; this detects utterances where the transcript might not match with the audio.  Plus misc small fixes.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4241 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent a43c9a77
......@@ -153,9 +153,14 @@ steps/train_sat.sh --cmd "$train_cmd" \
(
utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
exp/tri5a/graph data/dev exp/tri5a/decode_dev
exp/tri5a/graph data/dev exp/tri5a/decode_dev
)&
#
# steps/cleanup/find_bad_utts.sh --nj 200 --cmd "$train_cmd" data/train data/lang \
# exp/tri5a exp/tri5a_cleanup
# local/run_for_spkid.sh
# we don't have to results for the step below yet.
......
......@@ -111,11 +111,17 @@ exit 0
%WER 1.80 [ 226 / 12533, 23 ins, 52 del, 151 sub ] exp/nnet4c_gpu/decode/wer_5
%WER 8.64 [ 1083 / 12533, 93 ins, 169 del, 821 sub ] exp/nnet4c_gpu/decode_ug/wer_10
%WER 1.61 [ 202 / 12533, 25 ins, 47 del, 130 sub ] exp/nnet4d/decode/wer_5
%WER 8.17 [ 1024 / 12533, 83 ins, 179 del, 762 sub ] exp/nnet4d/decode_ug/wer_11
%WER 1.68 [ 211 / 12533, 29 ins, 39 del, 143 sub ] exp/nnet4d/decode/wer_4
%WER 8.40 [ 1053 / 12533, 101 ins, 153 del, 799 sub ] exp/nnet4d/decode_ug/wer_10
%WER 1.63 [ 204 / 12533, 29 ins, 42 del, 133 sub ] exp/nnet4d_gpu/decode/wer_4
%WER 8.11 [ 1016 / 12533, 80 ins, 168 del, 768 sub ] exp/nnet4d_gpu/decode_ug/wer_10
%WER 1.74 [ 218 / 12533, 25 ins, 48 del, 145 sub ] exp/nnet4d_gpu/decode/wer_6
%WER 8.39 [ 1051 / 12533, 106 ins, 149 del, 796 sub ] exp/nnet4d_gpu/decode_ug/wer_10
%WER 1.53 [ 192 / 12533, 22 ins, 42 del, 128 sub ] exp/nnet4d2/decode/wer_3
%WER 8.06 [ 1010 / 12533, 79 ins, 152 del, 779 sub ] exp/nnet4d2/decode_ug/wer_8
%WER 1.51 [ 189 / 12533, 25 ins, 34 del, 130 sub ] exp/nnet4d2_gpu/decode/wer_3
%WER 7.97 [ 999 / 12533, 78 ins, 152 del, 769 sub ] exp/nnet4d2_gpu/decode_ug/wer_8
%WER 1.37 [ 172 / 12533, 14 ins, 36 del, 122 sub ] exp/nnet4e_gpu/decode/wer_3
%WER 8.03 [ 1006 / 12533, 61 ins, 179 del, 766 sub ] exp/nnet4e_gpu/decode_ug/wer_8
......@@ -143,8 +149,8 @@ exit 0
# Discriminatively trained system (using p-norm rather than tanh nonlinearities, using SMBR, on GPU)
%WER 1.56 [ 195 / 12533, 28 ins, 31 del, 136 sub ] exp/nnet5d_mpe_gpu/decode_epoch2/wer_2
%WER 8.35 [ 1047 / 12533, 77 ins, 171 del, 799 sub ] exp/nnet5d_mpe_gpu/decode_ug_epoch4/wer_10
%WER 1.74 [ 218 / 12533, 25 ins, 48 del, 145 sub ] exp/nnet5d_mpe_gpu/decode_epoch1/wer_6
%WER 8.40 [ 1053 / 12533, 108 ins, 148 del, 797 sub ] exp/nnet5d_mpe_gpu/decode_ug_epoch1/wer_10
# Discriminatively trained system on top of ensemble trained p-norm network (using SMBR, on GPU)
%WER 1.36 [ 170 / 12533, 15 ins, 34 del, 121 sub ] exp/nnet5e_mpe_gpu/decode_epoch2/wer_3
......
#!/bin/bash
# 4d2 is as 4d but adding perturbed training with multiplier=1.0
train_stage=-10
use_gpu=true
. cmd.sh
. ./path.sh
. utils/parse_options.sh
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
parallel_opts="-l gpu=1"
num_threads=1
minibatch_size=512
dir=exp/nnet4d2_gpu
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
dir=exp/nnet4d2
fi
if [ ! -f $dir/final.mdl ]; then
steps/nnet2/train_pnorm_fast.sh --stage $train_stage \
--target-multiplier 1.0 \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--num-jobs-nnet 4 \
--num-epochs-extra 10 --add-layers-period 1 \
--num-hidden-layers 2 \
--mix-up 4000 \
--initial-learning-rate 0.02 --final-learning-rate 0.004 \
--cmd "$decode_cmd" \
--pnorm-input-dim 1000 \
--pnorm-output-dim 200 \
data/train data/lang exp/tri3b_ali $dir || exit 1;
fi
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test $dir/decode &
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode_ug \
exp/tri3b/graph_ug data/test $dir/decode_ug
wait
......@@ -2,58 +2,108 @@
# This script demonstrates discriminative training of p-norm neural nets.
# It's on top of run_4c_gpu.sh which uses adapted 40-dimensional features.
# It's on top of run_4d_gpu.sh which uses adapted 40-dimensional features.
# This version of the script uses GPUs. We distinguish it by putting "_gpu"
# at the end of the directory name.
gpu_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network,
# you'll likely have to change it. we'll
# use it later on, in the training (it's
# not used in denlat creation)
. ./cmd.sh
. ./path.sh
! cuda-compiled && cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
use_gpu=true
stage=0
transform_dir=exp/tri3b_ali
# The denominator lattice creation currently doesn't use GPUs.
. cmd.sh
. ./path.sh
. utils/parse_options.sh
# Note: we specify 1G each for the mem_free and ram_free which, is per
# thread... it will likely be less than the default. Increase the beam relative
# to the defaults; this is just for this RM setup, where the default beams will
# likely generate very thin lattices. Note: the transform-dir is important to
# specify, since this system is on top of fMLLR features.
nj=$(cat exp/tri3b_ali/num_jobs)
dir=nnet4d_gpu
steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G" \
--nj $nj --sub-split 20 --num-threads 6 --parallel-opts "-pe smp 6" \
--beam 20.0 --lattice-beam 10.0 \
--transform-dir exp/tri3b_ali \
data/train data/lang exp/$dir exp/$dir_denlats
[ ! -f $transform_dir/num_jobs ] && \
echo "Expected $transform_dir/num_jobs to exist" && exit 1;
nj_orig=$(cat $transform_dir/num_jobs)
steps/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu yes \
--transform-dir exp/tri3b_ali \
--nj $nj data/train data/lang exp/$dir exp/$dir_ali
steps/nnet2/train_discriminative.sh --cmd "$decode_cmd" \
--num-jobs-nnet 2 --transform-dir exp/tri3b_ali \
--num-threads 1 --parallel-opts "$gpu_opts" data/train data/lang \
exp/$dir_ali exp/$dir_denlats exp/$dir/final.mdl exp/nnet5d_mpe_gpu
# The queue options in this script are for the CLSP network, and might not work
# for you.
for epoch in 1 2 3 4; do
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --iter epoch$epoch \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/nnet5d_mpe_gpu/decode_epoch$epoch &
if $use_gpu; then
. ./cmd.sh
. ./path.sh
! cuda-compiled && cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
align_gpu_opts="-l gpu=1"
use_gpu_flag="--use-gpu yes"
train_parallel_opts="-l gpu=1"
train_num_threads=1
srcdir=exp/nnet4d_gpu
dir=exp/nnet5d_mpe_gpu
nj=$nj_orig
else
align_gpu_opts=
use_gpu_flag="--use-gpu no"
train_parallel_opts="-pe smp 6"
train_num_threads=6
srcdir=exp/nnet4d
dir=exp/nnet5d_mpe
if [ "$decode_cmd" != "run.pl" ]; then
nj=$[$nj_orig*5]; # use more jobs, or it will be slow in the alignment
# phase. But if we are just running everything on
# one machine this won't help us
else
nj=$nj_orig
fi
fi
if [ ! -f $srcdir/final.mdl ]; then
echo "$0: expected $srcdir/final.mdl to exist."
exit 1;
fi
# The denominator lattice creation currently doesn't use GPUs; that would be
# wasteful since the lattice determinization and graph search use up a fair
# amount of CPU, and we'd be idling the GPU much of the time.
# We specify 1G each for the mem_free and ram_free which, is per thread... it
# will likely be less than the default. Increase the beam relative to the
# defaults; this is just for this RM setup, where the default beams will likely
# generate very thin lattices.
# Note: the transform-dir is important to
# specify, since this system is on top of fMLLR features.
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --iter epoch$epoch \
--transform-dir exp/tri3b/decode_ug \
exp/tri3b/graph_ug data/test exp/nnet5d_mpe_gpu/decode_ug_epoch$epoch &
done
if [ $stage -le 0 ]; then
steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G" \
--nj $nj --sub-split 20 --num-threads 6 --parallel-opts "-pe smp 6" \
--beam 20.0 --lattice-beam 10.0 \
--transform-dir $transform_dir \
data/train data/lang $srcdir ${srcdir}_denlats
fi
if [ $stage -le 1 ]; then
steps/nnet2/align.sh --cmd "$decode_cmd $align_gpu_opts" $use_gpu_flag \
--transform-dir $transform_dir \
--nj $nj data/train data/lang $srcdir ${srcdir}_ali
fi
if [ $stage -le 2 ]; then
steps/nnet2/train_discriminative.sh --cmd "$decode_cmd" \
--num-jobs-nnet 2 --transform-dir $transform_dir \
--num-threads "$train_num_threads" --parallel-opts "$train_parallel_opts" data/train data/lang \
${srcdir}_ali ${srcdir}_denlats $srcdir/final.mdl $dir
fi
if [ $stage -le 3 ]; then
for epoch in 1 2 3 4; do
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --iter epoch$epoch \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test $dir/decode_epoch$epoch &
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --iter epoch$epoch \
--transform-dir exp/tri3b/decode_ug \
exp/tri3b/graph_ug data/test $dir/decode_ug_epoch$epoch &
done
wait
fi
exit 0;
......@@ -66,11 +116,11 @@ exit 0;
# acoustic_scale=0.1
# for criterion in smbr mmi mpfe; do
# for drop_frames in true false; do
# nnet-get-egs-discriminative --drop-frames=$drop_frames --criterion=$criterion --excise=true exp/tri5c_mpe/0.mdl 'ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:data/train/split8/1/utt2spk scp:data/train/split8/1/cmvn.scp "scp:head -n 40 data/train/split8/1/feats.scp|" ark:- | splice-feats --left-context=3 --right-context=3 ark:- ark:- | transform-feats exp/tri5c_mpe/final.mat ark:- ark:- | transform-feats --utt2spk=ark:data/train/split8/1/utt2spk ark:exp/tri3b_ali/trans.1 ark:- ark:- |' 'ark,s,cs:gunzip -c exp/$dir_ali/ali.1.gz |' 'ark,s,cs:gunzip -c exp/$dir_denlats/lat.1.gz|' "ark:|nnet-combine-egs-discriminative ark:- ark:1.egs"
# nnet-get-egs-discriminative --drop-frames=$drop_frames --criterion=$criterion --excise=true exp/tri5c_mpe/0.mdl 'ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:data/train/split8/1/utt2spk scp:data/train/split8/1/cmvn.scp "scp:head -n 40 data/train/split8/1/feats.scp|" ark:- | splice-feats --left-context=3 --right-context=3 ark:- ark:- | transform-feats exp/tri5c_mpe/final.mat ark:- ark:- | transform-feats --utt2spk=ark:data/train/split8/1/utt2spk ark:$transform_dir/trans.1 ark:- ark:- |' 'ark,s,cs:gunzip -c exp/${dir}_ali/ali.1.gz |' 'ark,s,cs:gunzip -c exp/${dir}_denlats/lat.1.gz|' "ark:|nnet-combine-egs-discriminative ark:- ark:1.egs"
# nnet-get-egs-discriminative --drop-frames=$drop_frames --criterion=$criterion --split=false --excise=false exp/tri5c_mpe/0.mdl 'ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:data/train/split8/1/utt2spk scp:data/train/split8/1/cmvn.scp "scp:head -n 40 data/train/split8/1/feats.scp|" ark:- | splice-feats --left-context=3 --right-context=3 ark:- ark:- | transform-feats exp/tri5c_mpe/final.mat ark:- ark:- | transform-feats --utt2spk=ark:data/train/split8/1/utt2spk ark:exp/tri3b_ali/trans.1 ark:- ark:- |' 'ark,s,cs:gunzip -c exp/$dir_ali/ali.1.gz |' 'ark,s,cs:gunzip -c exp/$dir_denlats/lat.1.gz|' ark:2.egs
# nnet-get-egs-discriminative --drop-frames=$drop_frames --criterion=$criterion --split=false --excise=false exp/tri5c_mpe/0.mdl 'ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:data/train/split8/1/utt2spk scp:data/train/split8/1/cmvn.scp "scp:head -n 40 data/train/split8/1/feats.scp|" ark:- | splice-feats --left-context=3 --right-context=3 ark:- ark:- | transform-feats exp/tri5c_mpe/final.mat ark:- ark:- | transform-feats --utt2spk=ark:data/train/split8/1/utt2spk ark:$transform_dir/trans.1 ark:- ark:- |' 'ark,s,cs:gunzip -c exp/${dir}_ali/ali.1.gz |' 'ark,s,cs:gunzip -c exp/${dir}_denlats/lat.1.gz|' ark:2.egs
# nnet-compare-hash-discriminative --acoustic-scale=$acoustic_scale --drop-frames=$drop_frames --criterion=$criterion exp/$dir/final.mdl ark:1.egs ark:2.egs || exit 1;
# nnet-compare-hash-discriminative --acoustic-scale=$acoustic_scale --drop-frames=$drop_frames --criterion=$criterion $dir/final.mdl ark:1.egs ark:2.egs || exit 1;
# done
# done
# )
......@@ -21,12 +21,15 @@ if $use_gpu; then
# This one is for training pnorm nnets on top of 40-dim + fMLLR features
# **THIS IS THE PRIMARY RECIPE**
local/nnet2/run_4d.sh --use-gpu true
# as above with 'perturbed training'. A bit better results, a bit slower.
local/nnet2/run_4d2.sh --use-gpu true
# This is discriminative training on top of 4c.
# This is discriminative training on top of 4c. (hardly helps)
local/nnet2/run_5c_gpu.sh
# This is discriminative training on top of 4d.
local/nnet2/run_5d_gpu.sh
local/nnet2/run_5d.sh --use-gpu true
else
# This example runs on top of "raw-fMLLR" features;
# you have to run local/run_raw_fmllr.sh first.
......@@ -42,9 +45,15 @@ else
# **THIS IS THE PRIMARY RECIPE (40-dim + fMLLR + p-norm neural net)**
local/nnet2/run_4d.sh --use-gpu false
# as above with 'perturbed training'. A bit better results, a bit slower.
local/nnet2/run_4d2.sh --use-gpu false
# This is discriminative training on top of 4c.
local/nnet2/run_5c.sh
# This is discriminative training on top of 4d.
local/nnet2/run_5d.sh --use-gpu false
# This is p-norm on top of raw-fMLLR.
#local/nnet2/run_4e.sh
......
......@@ -143,6 +143,15 @@ steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
steps/align_fmllr.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
data/train data/lang exp/tri3b exp/tri3b_ali
# # We have now added a script that will help you find portions of your data that
# # has bad transcripts, so you can filter it out. Below we demonstrate how to
# # run this script.
# steps/cleanup/find_bad_utts.sh --nj 20 --cmd "$train_cmd" data/train data/lang \
# exp/tri3b_ali exp/tri3b_cleanup
# # The following command will show you some of the hardest-to-align utterances in the data.
# head exp/tri3b_cleanup/all_info.sorted.txt
## MMI on top of tri3b (i.e. LDA+MLLT+SAT+MMI)
steps/make_denlats.sh --config conf/decode.config \
--nj 8 --cmd "$train_cmd" --transform-dir exp/tri3b_ali \
......
......@@ -56,6 +56,7 @@ echo $nj > $dir/num_jobs
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.alimdl $dir 2>/dev/null
cp $srcdir/final.occs $dir;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
......
......@@ -42,6 +42,11 @@ lang=$2
srcdir=$3
dir=$4
for f in $data/text $lang/oov.int $srcdir/tree $srcdir/final.mdl; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
done
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
......@@ -57,6 +62,7 @@ cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
......
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments using a model with delta or
# LDA+MLLT features. This version, rather than just using the
# text to align, computes mini-language models (unigram) from the text
# and a few common words in the LM, and allows
# Begin configuration section.
nj=4
cmd=run.pl
use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
acoustic_scale=0.1
beam=20.0
lattice_beam=10.0
transform_dir= # directory to find fMLLR transforms in.
top_n_words=100 # Number of common words that we compile into each graph (most frequent
# in $lang/text.
stage=0
cleanup=true
# End configuration options.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --use-graphs true # use graphs in src-dir"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
for f in $data/text $lang/oov.int $srcdir/tree $srcdir/final.mdl \
$lang/L_disambig.fst $lang/phones/disambig.int; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
done
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj
splice_opts=`cat $srcdir/splice_opts 2>/dev/null` # frame-splicing options.
cp $srcdir/splice_opts $dir 2>/dev/null # frame-splicing options.
cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
cp $srcdir/cmvn_opts $dir 2>/dev/null # cmn/cmvn option.
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
cp $srcdir/{tree,final.mdl} $dir || exit 1;
cp $srcdir/final.occs $dir;
utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt <$data/text | \
awk '{for(x=2;x<=NF;x++) print $x;}' | sort | uniq -c | \
sort -rn > $dir/word_counts.int || exit 1;
num_words=$(awk '{x+=$1} END{print x}' < $dir/word_counts.int) || exit 1;
# print top-n words with their unigram probabilities.
head -n $top_n_words $dir/word_counts.int | awk -v tot=$num_words '{print $1/tot, $2;}' >$dir/top_words.int
utils/int2sym.pl -f 2 $lang/words.txt <$dir/top_words.int >$dir/top_words.txt
if [ -f $srcdir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type"
case $feat_type in
delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $srcdir/full.mat $dir
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ -z "$transform_dir" ] && [ -f $srcdir/trans.1 ]; then
transform_dir=$srcdir
fi
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
nj_orig=$(cat $transform_dir/num_jobs)
if [ $nj -ne $nj_orig ]; then
# Copy the transforms into an archive with an index.
for n in $(seq $nj_orig); do cat $transform_dir/trans.$n; done | \
copy-feats ark:- ark,scp:$dir/trans.ark,$dir/trans.scp || exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/trans.scp ark:- ark:- |"
else
# number of jobs matches with alignment dir.
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
fi
elif [ -f $srcdir/final.alimdl ]; then
echo "$0: **WARNING**: you seem to be using an fMLLR system as input,"
echo " but you are not providing the --transform-dir option during alignment."
fi
echo "$0: decoding $data using utterance-specific decoding graphs using model from $srcdir, output in $dir"
if [ $stage -le 0 ]; then
rm $dir/edits.*.txt $dir/aligned_ref.*.txt 2>/dev/null
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text \| \
steps/cleanup/make_utterance_fsts.pl $dir/top_words.int \| \
compile-train-graphs-fsts $scale_opts --read-disambig-syms=$lang/phones/disambig.int \
$dir/tree $dir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
gmm-latgen-faster --acoustic-scale=$acoustic_scale --beam=$beam \
--lattice-beam=$lattice_beam --word-symbol-table=$lang/words.txt \
$dir/final.mdl ark:- "$feats" ark:- \| \
lattice-oracle ark:- "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|" \
ark,t:- ark,t:$dir/edits.JOB.txt \| \
utils/int2sym.pl -f 2- $lang/words.txt '>' $dir/aligned_ref.JOB.txt || exit 1;
fi
if [ $stage -le 1 ]; then
if [ -f $dir/edits.1.txt ]; then
for x in $(seq $nj); do cat $dir/edits.$x.txt; done > $dir/edits.txt
for x in $(seq $nj); do cat $dir/aligned_ref.$x.txt; done > $dir/aligned_ref.txt
else
echo "$0: warning: no file $dir/edits.1.txt, using previously concatenated file if present."
fi
# in case any utterances failed to align, get filtered copy of $data/text that's filtered.
utils/filter_scp.pl $dir/edits.txt < $data/text > $dir/text
cat $dir/text | awk '{print $1, (NF-1);}' > $dir/length.txt
n1=$(wc -l < $dir/edits.txt)
n2=$(wc -l < $dir/aligned_ref.txt)
n3=$(wc -l < $dir/text)
n4=$(wc -l < $dir/length.txt)
if [ $n1 -ne $n2 ] || [ $n2 -ne $n3 ] || [ $n3 -ne $n4 ]; then
echo "$0: mismatch in lengths of files:"
wc $dir/edits.txt $dir/aligned_ref.txt $dir/text $dir/length.txt
exit 1;
fi
# note: the format of all_info.txt is:
# <utterance-id> <number of errors> <reference-length> <decoded-output> <reference>
# with the fields separated by tabs, e.g.
# adg04_sr009_trn 1 12 SHOW THE GRIDLEY+S TRACK IN BRIGHT ORANGE WITH HORNE+S IN DIM RED AT SHOW THE GRIDLEY+S TRACK IN BRIGHT ORANGE WITH HORNE+S IN DIM RED
paste $dir/edits.txt \
<(awk '{print $2}' $dir/length.txt) \
<(awk '{$1="";print;}' <$dir/aligned_ref.txt) \
<(awk '{$1="";print;}' <$dir/text) > $dir/all_info.txt
sort -nr -k2 $dir/all_info.txt > $dir/all_info.sorted.txt
if $cleanup; then
rm $dir/edits.*.txt $dir/aligned_ref.*.txt
fi
fi
#!/usr/bin/perl -w
# makes unigram decoding-graph FSTs specific to each utterances, where the
# supplied top-n-words list together with the supervision text of the utterance are
# combined.
if (@ARGV != 1) {
print STDERR "Usage: make_utterance_fsts.pl top-words-file.txt < text-archive > fsts-archive\n" .
"e.g.: utils/sym2int.pl -f 2- data/lang/words.txt data/train/text | \\\n" .
" make_utterance_fsts.pl exp/foo/top_words.int | compile-train-graphs-fsts ... \n";
}
($top_words_file) = @ARGV;
open(F, "<$top_words_file") || die "opening $top_words_file";
%top_word_probs = ( );
while(<F>) {
@A = split;
(@A == 2 && $A[0] > 0.0) || die "Bad line $_ in $top_words_file";
$A[1] =~ m/^[0-9]+$/ || die "Expecting numeric word-ids in $top_words_file: $_\n";
$top_word_probs{$A[1]} += $A[0];
}
while (<STDIN>) {
@A = split;
$utterance_id = shift @A;
print "$utterance_id\n";
$num_words = @A + 0; # length of array @A
%word_probs = %top_word_probs;
foreach $w (@A) {
$w =~ m/^[0-9]+$/ || die "Expecting numeric word-ids as stdin: $_";
$word_probs{$w} += 1.0 / $num_words;
}
foreach $w (keys %word_probs) {
$prob = $word_probs{$w};
$prob > 0.0 || die "Word $w with bad probability $prob, utterance-id = $utterance_id\n";
$cost = -log($prob);
print "0 0 $w $w $cost\n";
}
$final_cost = -log(1.0 / $num_words);
print "0 $final_cost\n";
print "\n"; # Empty line terminates the FST in the text-archive format.
}
......@@ -4,7 +4,8 @@
# Apache 2.0
# Begin configuration section.
transform_dir=
transform_dir= # this option won't normally be used, but it can be used if you want to
# supply existing fMLLR transforms when decoding.
iter=
model= # You can specify the model to use (e.g. if you want to use the .alimdl)
stage=0
......
<
......@@ -77,20 +77,31 @@ case $feat_type in
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
if ! [ $nj -eq `cat $transform_dir/num_jobs` ]; then
echo "$0: Number of jobs mismatch with transform-dir: $nj versus `cat $transform_dir/num_jobs`";
echo "$0: using transforms from $transform_dir"
[ ! -s $transform_dir/num_jobs ] && \
echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1;
nj_orig=$(cat $transform_dir/num_jobs)
if [ $feat_type == "raw" ]; then trans=raw_trans;
else trans=trans; fi
if [ $feat_type == "lda" ] && ! cmp $transform_dir/final.mat $srcdir/final.mat; then
echo "$0: LDA transforms differ between $srcdir and $transform_dir"
exit 1;
fi
if [ $feat_type == "lda" ]; then
[ ! -f $transform_dir/trans.1 ] && echo "No such file $transform_dir/raw_trans.1" && exit 1;
echo "$0: using transforms from $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
if [ ! -f $transform_dir/$trans.1 ]; then
echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)"
exit 1;