Commit 43594ae3 authored by Vijayaditya Peddinti's avatar Vijayaditya Peddinti
Browse files

egs/ami: Added TDNN recipe for AMI IHM task.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5218 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 1cb64585
......@@ -3,6 +3,7 @@
for x in exp/ihm/{mono,tri,sgmm,nnet,dnn,lstm}*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done 2>/dev/null
# sclite / asclite:
for x in exp/ihm/{mono,tri,sgmm,nnet,dnn,lstm}*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done 2>/dev/null
exit 0
dev
......@@ -18,3 +19,13 @@ exp/ihm/tri4a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_13/eval_o4.ctm.filt.dtl:Pe
exp/ihm/tri4a_mmi_b0.1/decode_eval_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_12/eval_o4.ctm.filt.dtl:Percent Total Error = 31.7% (28518)
# TDNN results
for x in exp/$mic/nnet2_online/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done
#dev
%WER 25.0 | 13098 94483 | 78.3 12.0 9.6 3.4 25.0 57.7 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_dev/ascore_13/dev_hires.ctm.filt.sys
%WER 25.3 | 13098 94468 | 78.5 12.7 8.8 3.8 25.3 57.9 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_dev_utt/ascore_12/dev_hires.ctm.filt.sys
%WER 25.0 | 13098 94476 | 78.5 12.4 9.1 3.6 25.0 58.0 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_dev_utt_offline/ascore_13/dev_hires.ctm.filt.sys
#eval
%WER 25.9 | 12643 89971 | 77.2 14.2 8.6 3.2 25.9 56.4 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_eval/ascore_12/eval_hires.ctm.filt.sys
%WER 26.0 | 12643 89976 | 77.1 14.7 8.2 3.2 26.0 55.7 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_eval_utt/ascore_12/eval_hires.ctm.filt.sys
%WER 25.8 | 12643 89978 | 77.6 14.6 7.8 3.4 25.8 55.8 | exp/ihm/nnet2_online/nnet_ms_sp_online/decode_eval_utt_offline/ascore_11/eval_hires.ctm.filt.sys
#!/bin/bash
# this script contains some common (shared) parts of the run_nnet*.sh scripts.
. cmd.sh
stage=0
mic=ihm
num_threads_ubm=32
set -e
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if [ $stage -le 1 ]; then
# Create high-resolution MFCC features (with 40 cepstra instead of 13).
# this shows how you can split across multiple file-systems. we'll split the
# MFCC dir across multiple locations. You might want to be careful here, if you
# have multiple copies of Kaldi checked out and run the same recipe, not to let
# them overwrite each other.
mfccdir=mfcc_${mic}_hires
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
fi
for datadir in train dev eval; do
utils/copy_data_dir.sh data/$mic/$datadir data/$mic/${datadir}_hires
if [ "$datadir" == "train" ]; then
dir=data/$mic/train_hires
cat $dir/wav.scp | python -c "
import sys, os, subprocess, re, random
scale_low = 1.0/8
scale_high = 2.0
for line in sys.stdin.readlines():
if len(line.strip()) == 0:
continue
print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high))
"| sort -k1,1 -u > $dir/wav.scp_scaled || exit 1;
mv $dir/wav.scp $dir/wav.scp_nonorm
mv $dir/wav.scp_scaled $dir/wav.scp
fi
steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/$mic/${datadir}_hires exp/make_${mic}_hires/$datadir $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$mic/${datadir}_hires exp/make_${mic}_hires/$mic/$datadir $mfccdir || exit 1;
done
fi
if [ $stage -le 2 ]; then
# Train a system just for its LDA+MLLT transform. We use --num-iters 13
# because after we get the transform (12th iter is the last), any further
# training is pointless.
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
--realign-iters "" \
--splice-opts "--left-context=3 --right-context=3" \
5000 10000 data/$mic/train_hires data/lang \
exp/$mic/tri4a_ali exp/$mic/nnet2_online/tri5
fi
if [ $stage -le 3 ]; then
mkdir -p exp/nnet2_online
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 700000 \
--num-threads $num_threads_ubm \
data/$mic/train_hires 512 exp/$mic/nnet2_online/tri5 exp/$mic/nnet2_online/diag_ubm
fi
if [ $stage -le 4 ]; then
# iVector extractors can in general be sensitive to the amount of data, but
# this one has a fairly small dim (defaults to 100)
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
data/$mic/train_hires exp/$mic/nnet2_online/diag_ubm exp/$mic/nnet2_online/extractor || exit 1;
fi
if [ $stage -le 5 ]; then
ivectordir=exp/$mic/nnet2_online/ivectors_train_hires
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/ami-$mic-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
fi
# We extract iVectors on all the train data, which will be what we train the
# system on. With --utts-per-spk-max 2, the script. pairs the utterances
# into twos, and treats each of these pairs as one speaker. Note that these
# are extracted 'online'.
# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/$mic/train_hires data/$mic/train_hires_max2
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
data/$mic/train_hires_max2 exp/$mic/nnet2_online/extractor exp/$mic/nnet2_online/ivectors_train_hires || exit 1;
fi
exit 0;
#!/bin/bash
# Copyright 2013 Johns Hopkins University (author: Daniel Povey)
# 2014 Tom Ko
# 2014 Vijay Peddinti
# Apache 2.0
# This example script demonstrates how speed perturbation of the data helps the nnet training in the SWB setup.
. ./cmd.sh
set -e
stage=1
train_stage=-10
use_gpu=true
splice_indexes="layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3"
common_egs_dir=
has_fisher=true
mic=ihm
nj=70
affix=
num_threads_ubm=32
. ./path.sh
. ./utils/parse_options.sh
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
parallel_opts="--gpu 1"
num_threads=1
minibatch_size=512
# the _a is in case I want to change the parameters.
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
fi
dir=exp/$mic/nnet2_online/nnet_ms_sp${affix:+_$affix}
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=exp/$mic/tri4a/graph_${LM}
# Run the common stages of training, including training the iVector extractor
local/online/run_nnet2_common.sh --stage $stage --mic $mic \
--num-threads-ubm $num_threads_ubm|| exit 1;
if [ $stage -le 6 ]; then
#Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
# _sp stands for speed-perturbed
utils/perturb_data_dir_speed.sh 0.9 data/$mic/train data/$mic/temp1
utils/perturb_data_dir_speed.sh 1.0 data/$mic/train data/$mic/temp2
utils/perturb_data_dir_speed.sh 1.1 data/$mic/train data/$mic/temp3
utils/combine_data.sh --extra-files utt2uniq data/$mic/train_sp data/$mic/temp1 data/$mic/temp2 data/$mic/temp3
rm -r data/$mic/temp1 data/$mic/temp2 data/$mic/temp3
mfccdir=mfcc_${mic}_perturbed
for x in train_sp; do
steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj \
data/$mic/$x exp/make_${mic}_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$mic/$x exp/make_${mic}_mfcc/$x $mfccdir || exit 1;
done
utils/fix_data_dir.sh data/$mic/train_sp
fi
if [ $stage -le 7 ]; then
steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
data/$mic/train_sp data/lang exp/$mic/tri4a exp/$mic/tri4a_sp_ali || exit 1
fi
if [ $stage -le 8 ]; then
#Now perturb the high resolution daa
utils/perturb_data_dir_speed.sh 0.9 data/$mic/train_hires data/temp1
utils/perturb_data_dir_speed.sh 1.0 data/$mic/train_hires data/temp2
utils/perturb_data_dir_speed.sh 1.1 data/$mic/train_hires data/temp3
utils/combine_data.sh --extra-files utt2uniq data/$mic/train_hires_sp data/temp1 data/temp2 data/temp3
rm -r data/temp1 data/temp2 data/temp3
mfccdir=mfcc_${mic}_perturbed
for x in train_hires_sp; do
steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj --mfcc-config conf/mfcc_hires.conf \
data/$mic/$x exp/make_${mic}_hires/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$mic/$x exp/make_${mic}_hires/$x $mfccdir || exit 1;
done
utils/fix_data_dir.sh data/$mic/train_hires_sp
fi
if [ $stage -le 9 ]; then
# We extract iVectors on all the train data, which will be what we
# train the system on.
# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/$mic/train_hires_sp data/$mic/train_hires_sp_max2
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
data/$mic/train_hires_sp_max2 exp/$mic/nnet2_online/extractor exp/$mic/nnet2_online/ivectors_train_hires_sp2 || exit 1;
fi
if [ $stage -le 10 ]; then
steps/nnet2/train_multisplice_accel2_fix.sh --stage $train_stage \
--num-epochs 3 --num-jobs-initial 2 --num-jobs-final 12 \
--num-hidden-layers 6 --splice-indexes "$splice_indexes" \
--feat-type raw \
--online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--io-opts "--max-jobs-run 12" \
--add-layers-period 1 \
--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
--cmd "$decode_cmd" \
--egs-dir "$common_egs_dir" \
--pnorm-input-dim 950 \
--pnorm-output-dim 950 \
data/$mic/train_hires_sp data/lang exp/$mic/tri4a_sp_ali $dir || exit 1;
fi
if [ $stage -le 11 ]; then
# If this setup used PLP features, we'd have to give the option --feature-type plp
# to the script below.
steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \
data/lang exp/$mic/nnet2_online/extractor "$dir" ${dir}_online || exit 1;
fi
wait;
if [ $stage -le 12 ]; then
# do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker.
for decode_set in dev eval; do
(
num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
decode_dir=${dir}_online/decode_${decode_set}
steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
$graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1;
) &
done
fi
if [ $stage -le 13 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
for decode_set in dev eval; do
(
num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
decode_dir=${dir}_online/decode_${decode_set}_utt
steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
--per-utt true $graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1;
) &
done
fi
if [ $stage -le 14 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information, but looks to the end
# of the utterance while computing the iVector (--online false)
for decode_set in dev eval; do
(
num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
decode_dir=${dir}_online/decode_${decode_set}_utt_offline
steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
--per-utt true --online false $graph_dir data/$mic/${decode_set}_hires \
$decode_dir || exit 1;
) &
done
fi
wait;
exit 0;
......@@ -172,4 +172,9 @@ if [ $stage -le 12 ]; then
local/nnet/run_dnn.sh $mic
fi
# TDNN training.
if [ $stage -le 13 ]; then
local/online/run_nnet2_ms_perturbed.sh --mic $mic
fi
echo "Done!"
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
# 2013 Xiaohui Zhang
# 2013 Guoguo Chen
# 2014 Vimal Manohar
# 2014 Vijayaditya Peddinti
# Apache 2.0.
# train_multisplice_accel2.sh is a modified version of
# train_pnorm_multisplice2.sh (still using pnorm). The "accel" refers to the
# fact that we increase the number of jobs during training (from
# --num-jobs-initial to --num-jobs-final). We dropped "pnorm" from the name as
# it was getting too long.
# Begin configuration section.
cmd=run.pl
num_epochs=15 # Number of epochs of training;
# the number of iterations is worked out from this.
initial_effective_lrate=0.01
final_effective_lrate=0.001
bias_stddev=0.5
pnorm_input_dim=3000
pnorm_output_dim=300
minibatch_size=128 # by default use a smallish minibatch size for neural net
# training; this controls instability which would otherwise
# be a problem with multi-threaded update.
samples_per_iter=400000 # each iteration of training, see this many samples
# per job. This option is passed to get_egs.sh
num_jobs_initial=1 # Number of neural net jobs to run in parallel at the start of training
num_jobs_final=8 # Number of neural net jobs to run in parallel at the end of training
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
# more than enough.
num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
get_egs_stage=0
fix_nnet=true
min_average=0.05
max_average=0.95
online_ivector_dir=
remove_egs=false # set to false to disable removing egs.
max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
# to the final 'combine' stage, but these models will themselves be averages of
# iteration-number ranges.
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in
# disk I/O. Smaller is easier on disk and memory but less random. It's
# not a huge deal though, as samples are anyway randomized right at the start.
# (the point of this is to get data in different minibatches on different iterations,
# since in the preconditioning method, 2 samples in the same minibatch can
# affect each others' gradients.
add_layers_period=2 # by default, add new layers every 2 iterations.
num_hidden_layers=3
stage=-4
exit_stage=-100 # you can set this to terminate the training early. Exits before running this stage
splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3"
# Format : layer<hidden_layer>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
# note: hidden layers which are composed of one or more components,
# so hidden layer indexing is different from component count
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't
randprune=4.0 # speeds up LDA.
alpha=4.0 # relates to preconditioning.
update_period=4 # relates to online preconditioning: says how often we update the subspace.
num_samples_history=2000 # relates to online preconditioning
max_change_per_sample=0.075
precondition_rank_in=20 # relates to online preconditioning
precondition_rank_out=80 # relates to online preconditioning
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
# specified.)
num_threads=16
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G"
# by default we use 16 threads; this lets the queue know.
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
combine_num_threads=8
combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage.
cleanup=true
egs_dir=
lda_opts=
lda_dim=
egs_opts=
transform_dir= # If supplied, overrides alidir
cmvn_opts= # will be passed to get_lda.sh and get_egs.sh, if supplied.
# only relevant for "raw" features, not lda.
feat_type= # Can be used to force "raw" features.
align_cmd= # The cmd that is passed to steps/nnet2/align.sh
align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no]
realign_times= # List of times on which we realign. Each time is
# floating point number strictly between 0 and 1, which
# will be multiplied by the num-iters to get an iteration
# number.
num_jobs_align=30 # Number of jobs for realignment
# End configuration section.
frames_per_eg=8 # to be passed on to get_egs2.sh
trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <exp-dir>"
echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|15> # Number of epochs of training"
echo " --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training."
echo " --final-effective-lrate <lrate|0.004> # effective learning rate at end of training."
echo " # data, 0.00025 for large data"
echo " --num-hidden-layers <#hidden-layers|2> # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
echo " --add-layers-period <#iters|2> # Number of iterations between adding hidden layers"
echo " --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer,"
echo " # per context-dependent state. Try a number several times #states."
echo " --num-jobs-initial <num-jobs|1> # Number of parallel jobs to use for neural net training, at the start."
echo " --num-jobs-final <num-jobs|8> # Number of parallel jobs to use for neural net training, at the end"
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)."
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --splice-indexes <string|layer0/-4:-3:-2:-1:0:1:2:3:4> "
echo " # Frame indices used for each splice layer."
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|''> # A list of space-separated epoch indices the beginning of which"
echo " # realignment is to be done"
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
echo " --align-use-gpu (yes/no) # specify is gpu is to be used for realignment"
echo " --num-jobs-align <#njobs|30> # Number of jobs to perform realignment"
echo " --stage <stage|-4> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
exit 1;
fi
data=$1
lang=$2
alidir=$3
dir=$4
if [ ! -z "$realign_times" ]; then
[ -z "$align_cmd" ] && echo "$0: realign_times specified but align_cmd not specified" && exit 1
[ -z "$align_use_gpu" ] && echo "$0: realign_times specified but align_use_gpu not specified" && exit 1
fi
# Check some files.
for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Set some variables.
num_leaves=`tree-info $alidir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1
[ -z $num_leaves ] && echo "\$num_leaves is unset" && exit 1
[ "$num_leaves" -eq "0" ] && echo "\$num_leaves is 0" && exit 1
nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir...
# in this dir we'll have just one job.
sdata=$data/split$nj
utils/split_data.sh $data $nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
cp $alidir/tree $dir
# process the splice_inds string, to get a layer-wise context string
# to be processed by the nnet-components
# this would be mainly used by SpliceComponent|SpliceMaxComponent
python steps/nnet2/make_multisplice_configs.py contexts --splice-indexes "$splice_indexes" $dir || exit -1;
context_string=$(cat $dir/vars) || exit -1
echo $context_string
eval $context_string || exit -1; #
# initializes variables used by get_lda.sh and get_egs.sh
# get_lda.sh : first_left_context, first_right_context,
# get_egs.sh : nnet_left_context & nnet_right_context
extra_opts=()
[ ! -z "$cmvn_opts" ] && extra_opts+=(--cmvn-opts "$cmvn_opts")
[ ! -z "$feat_type" ] && extra_opts+=(--feat-type $feat_type)
[ ! -z "$online_ivector_dir" ] && extra_opts+=(--online-ivector-dir $online_ivector_dir)
[ -z "$transform_dir" ] && transform_dir=$alidir
extra_opts+=(--transform-dir $transform_dir)
if [ $stage -le -4 ]; then
echo "$0: calling get_lda.sh"
steps/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1;
fi
# these files will have been written by get_lda.sh
feat_dim=$(cat $dir/feat_dim) || exit 1;
ivector_dim=$(cat $dir/ivector_dim) || exit 1;
lda_dim=$(cat $dir/lda_dim) || exit 1;
if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
extra_opts+=(--left-context $nnet_left_context )
extra_opts+=(--right-context $nnet_right_context )
echo "$0: calling get_egs2.sh"
steps/nnet2/get_egs2.sh $egs_opts "${extra_opts[@]}" \
--samples-per-iter $samples_per_iter --stage $get_egs_stage \
--io-opts "$io_opts" \
--cmd "$cmd" $egs_opts \
--frames-per-eg $frames_per_eg \
$data $alidir $dir/egs || exit 1;
fi
if [ -z $egs_dir ]; then
egs_dir=$dir/egs
# confirm that the provided egs_dir has the necessary context
egs_left_context=$(cat $egs_dir/info/left_context) || exit -1
egs_right_context=$(cat $egs_dir/info/right_context) || exit -1
echo $egs_left_context $nnet_left_context $egs_right_context $nnet_right_context
([[ $egs_left_context -lt $nnet_left_context ]] || [[ $egs_right_context -lt $nnet_right_context ]]) &&
echo "Provided egs_dir $egs_dir does not have sufficient context to train the neural network." && exit -1;
fi
frames_per_eg=$(cat $egs_dir/info/frames_per_eg) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
# num_archives_expanded considers each separate label-position from
# 0..frames_per_eg-1 to be a separate archive.
num_archives_expanded=$[$num_archives*$frames_per_eg]
[ $num_jobs_initial -gt $num_jobs_final ] && \
echo "$0: --initial-num-jobs cannot exceed --final-num-jobs" && exit 1;
[ $num_jobs_final -gt $num_archives_expanded ] && \
echo "$0: --final-num-jobs cannot exceed #archives $num_archives_expanded." && exit 1;
if ! [ $num_hidden_layers -ge 1 ]; then
echo "Invalid num-hidden-layers $num_hidden_layers"
exit 1
fi
if [ $stage -le -2 ]; then
echo "$0: initializing neural net";
lda_mat=$dir/lda.mat
tot_input_dim=$[$feat_dim+$ivector_dim]
online_preconditioning_opts="alpha=$alpha num-samples-history=$num_samples_history update-period=$update_period rank-in=$precondition_rank_in rank-out=$precondition_rank_out max-change-per-sample=$max_change_per_sample"
initial_lrate=$(perl -e "print ($initial_effective_lrate*$num_jobs_initial);")
# create the config files for nnet initialization
python steps/nnet2/make_multisplice_configs.py \
--splice-indexes "$splice_indexes" \
--total-input-dim $tot_input_dim \
--ivector-dim $ivector_dim \
--lda-mat "$lda_mat" \
--lda-dim $lda_dim \
--pnorm-input-dim $pnorm_input_dim \
--pnorm-output-dim $pnorm_output_dim \
--online-preconditioning-opts "$online_preconditioning_opts" \
--initial-learning-rate $initial_lrate \
--bias-stddev $bias_stddev \
--num-hidden-layers $num_hidden_layers \
--num-targets $num_leaves \
configs $dir || exit -1;
$cmd $dir/log/nnet_init.log \
nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
$dir/0.mdl || exit 1;
fi
#if [ $pnorm_input_dim -eq $pnorm_output_dim ]; then fix_nnet=true;fi
if [ $stage -le -1 ]; then
echo "Training transition probabilities and setting priors"
$cmd $dir/log/train_trans.log \
nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
|| exit 1;
fi
# set num_iters so that as close as possible, we process the data $num_epochs
# times, i.e. $num_iters*$avg_num_jobs) == $num_epochs*$num_archives_expanded,