Commit a26d69a4 authored by Daniel Povey's avatar Daniel Povey
Browse files

Merge pull request #29 from vijayaditya/ami_sdm

Added TDNN recipe for ami/sdm; corrected a bug in steps/nnet2/get_egs_discriminative2.sh
parents 905e4c61 77494809
......@@ -17,3 +17,49 @@ exp/sdm1/tri2a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_13/eval_o4.ctm.filt.dtl:P
exp/sdm1/tri3a/decode_eval_ami_fsh.o3g.kn.pr1-7/ascore_12/eval_o4.ctm.filt.dtl:Percent Total Error = 69.5% (62576)
exp/sdm1/tri3a_mmi_b0.1/decode_eval_3.mdl_ami_fsh.o3g.kn.pr1-7/ascore_10/eval_o4.ctm.filt.dtl:Percent Total Error = 67.2% (60447)
# TDNN results
for x in exp/$mic/nnet2_online/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep Sum $x/ascore_*/*.sys | utils/best_wer.sh; done
# Cross entropy training
%WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
%WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys
%WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
%WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_online/decode_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
# sMBR training
# dev set
# epoch 0
%WER 46.8 | 15053 94502 | 59.3 27.6 13.0 6.2 46.8 67.0 | -23.602 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
%WER 46.4 | 14210 94496 | 59.0 26.6 14.4 5.4 46.4 70.7 | -23.844 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_dev_utt_offline/ascore_13/dev_hires_o4.ctm.filt.sys
#epoch 1
%WER 45.7 | 14207 94490 | 59.5 22.9 17.6 5.3 45.7 70.5 | -24.681 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt/ascore_11/dev_hires_o4.ctm.filt.sys
%WER 45.9 | 15232 94491 | 59.9 23.2 17.0 5.7 45.9 65.9 | -24.541 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_dev_utt_offline/ascore_10/dev_hires_o4.ctm.filt.sys
#epoch 2
%WER 45.9 | 14543 94497 | 59.3 22.5 18.2 5.3 45.9 68.8 | -24.748 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
%WER 46.1 | 14125 94492 | 59.6 22.7 17.7 5.7 46.1 71.1 | -24.626 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_dev_utt_offline/ascore_11/dev_hires_o4.ctm.filt.sys
#epoch 3
%WER 46.0 | 15128 94502 | 59.6 23.1 17.3 5.6 46.0 66.2 | -24.565 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt/ascore_12/dev_hires_o4.ctm.filt.sys
%WER 46.2 | 14764 94498 | 59.3 22.3 18.4 5.5 46.2 68.0 | -24.723 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys
#epoch 4
%WER 46.1 | 15193 94485 | 58.5 21.4 20.1 4.6 46.1 65.8 | -25.114 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt/ascore_15/dev_hires_o4.ctm.filt.sys
%WER 46.5 | 15169 94494 | 59.2 22.8 18.0 5.7 46.5 66.3 | -24.554 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_dev_utt_offline/ascore_12/dev_hires_o4.ctm.filt.sys
# eval set
#epoch 0
%WER 50.7 | 13180 89643 | 54.7 29.6 15.7 5.3 50.7 72.6 | -23.104 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
%WER 50.5 | 13099 89806 | 54.7 29.3 15.9 5.2 50.5 73.5 | -23.149 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch0_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
#epoch 1
%WER 49.3 | 13432 89977 | 55.4 25.2 19.4 4.7 49.3 70.7 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt/ascore_12/eval_hires_o4.ctm.filt.sys
%WER 49.2 | 13497 89975 | 55.5 24.9 19.5 4.7 49.2 70.7 | -23.937 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch1_eval_utt_offline/ascore_12/eval_hires_o4.ctm.filt.sys
#epoch 2
%WER 49.2 | 13372 89987 | 55.6 25.3 19.0 4.9 49.2 71.0 | -23.850 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt/ascore_13/eval_hires_o4.ctm.filt.sys
%WER 48.9 | 13318 89796 | 55.9 25.2 18.9 4.8 48.9 71.3 | -23.901 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch2_eval_utt_offline/ascore_13/eval_hires_o4.ctm.filt.sys
#epoch 3
%WER 49.0 | 14307 89984 | 55.7 25.3 19.0 4.8 49.0 66.3 | -23.885 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt/ascore_14/eval_hires_o4.ctm.filt.sys
%WER 48.9 | 14084 89798 | 55.9 25.3 18.8 4.8 48.9 67.4 | -23.884 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch3_eval_utt_offline/ascore_14/eval_hires_o4.ctm.filt.sys
#epoch 4
%WER 49.1 | 13948 89977 | 55.6 25.2 19.2 4.8 49.1 68.2 | -23.902 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt/ascore_15/eval_hires_o4.ctm.filt.sys
%WER 49.0 | 14259 89798 | 55.8 25.4 18.8 4.8 49.0 66.6 | -23.873 | exp/sdm1/nnet2_online/nnet_ms_sp_smbr_0.000005/decode_epoch4_eval_utt_offline/ascore_15/eval_hires_o4.ctm.filt.sys
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0 -q all.q
option gpu=* -l gpu=$0 -q g.q
default allow_k20=true
option allow_k20=true
option allow_k20=false -l 'hostname=!g01*&!g02*&!b06*'
......@@ -18,7 +18,9 @@ has_fisher=true
mic=ihm
nj=70
affix=
hidden_dim=950
num_threads_ubm=32
use_sat_alignments=true
. ./path.sh
. ./utils/parse_options.sh
......@@ -33,6 +35,16 @@ EOF
parallel_opts="--gpu 1"
num_threads=1
minibatch_size=512
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
parallel_opts="$parallel_opts --config conf/queue_no_k20.conf --allow-k20 false"
# that config is like the default config in the text of queue.pl, but adding the following lines.
# default allow_k20=true
# option allow_k20=true
# option allow_k20=false -l 'hostname=!g01&!g02&!b06'
# It's a workaround for an NVidia CUDA library bug for our currently installed version
# of the CUDA toolkit, that only shows up on k20's
fi
# the _a is in case I want to change the parameters.
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
......@@ -43,12 +55,21 @@ else
fi
dir=exp/$mic/nnet2_online/nnet_ms_sp${affix:+_$affix}
if [ "$use_sat_alignments" == "true" ] ; then
gmm_dir=exp/$mic/tri4a
align_script=steps/align_fmllr.sh
else
gmm_dir=exp/$mic/tri3a
align_script=steps/align_si.sh
fi
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=exp/$mic/tri4a/graph_${LM}
graph_dir=$gmm_dir/graph_${LM}
# Run the common stages of training, including training the iVector extractor
local/online/run_nnet2_common.sh --stage $stage --mic $mic \
--use-sat-alignments $use_sat_alignments \
--num-threads-ubm $num_threads_ubm|| exit 1;
if [ $stage -le 6 ]; then
......@@ -70,8 +91,8 @@ if [ $stage -le 6 ]; then
fi
if [ $stage -le 7 ]; then
steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
data/$mic/train_sp data/lang exp/$mic/tri4a exp/$mic/tri4a_sp_ali || exit 1
$align_script --nj $nj --cmd "$train_cmd" \
data/$mic/train_sp data/lang $gmm_dir ${gmm_dir}_sp_ali || exit 1
fi
if [ $stage -le 8 ]; then
......@@ -118,9 +139,9 @@ if [ $stage -le 10 ]; then
--initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
--cmd "$decode_cmd" \
--egs-dir "$common_egs_dir" \
--pnorm-input-dim 950 \
--pnorm-output-dim 950 \
data/$mic/train_hires_sp data/lang exp/$mic/tri4a_sp_ali $dir || exit 1;
--pnorm-input-dim $hidden_dim \
--pnorm-output-dim $hidden_dim \
data/$mic/train_hires_sp data/lang ${gmm_dir}_sp_ali $dir || exit 1;
fi
if [ $stage -le 11 ]; then
......
#!/bin/bash
# This script does discriminative training on top of the online, multi-splice
# system trained in run_nnet2_ms.sh.
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
# since the lattice generation runs in about real-time, so takes of the order of
# 1000 hours of CPU time.
#
# Note: rather than using any features we have dumped on disk, this script
# regenerates them from the wav data three times-- when we do lattice
# generation, numerator alignment and discriminative training. This made the
# script easier to write and more generic, because we don't have to know where
# the features and the iVectors are, but of course it's a little inefficient.
# The time taken is dominated by the lattice generation anyway, so this isn't
# a huge deal.
. cmd.sh
stage=0
train_stage=-10
use_gpu=true
criterion=smbr
drop_frames=false # only matters for MMI anyway.
effective_lrate=0.000005
srcdir=
mic=ihm
num_jobs_nnet=6
train_stage=-10 # can be used to start training in the middle.
decode_start_epoch=0 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
num_epochs=4
cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
# alignments and degs).
gmm_dir=exp/$mic/tri4a
set -e
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
parallel_opts=" -l gpu=1,hostname='!g01*&!g02*' " #we want to submit to all.q as we use multiple GPUs for this
num_threads=1
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
parallel_opts="-pe smp $num_threads"
fi
if [ -z $srcdir ]; then
srcdir=exp/$mic/nnet2_online/nnet_ms_sp
fi
if [ ! -f ${srcdir}_online/final.mdl ]; then
echo "$0: expected ${srcdir}_online/final.mdl to exist; first run run_nnet2_ms.sh."
exit 1;
fi
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
graph_dir=$gmm_dir/graph_${LM}
if [ $stage -le 1 ]; then
nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of
# the phases of get_egs_discriminative2.sh below.
num_threads_denlats=6
subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
# max total slots = 80 * 6 = 480.
steps/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads_denlats" \
--online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
--nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.conf \
data/$mic/train_hires_sp data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 2 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
nj=76 # have a high number of jobs because this could take a while, and we might
# have some stragglers.
use_gpu=no
gpu_opts=
steps/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
--online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
--nj $nj data/$mic/train_hires_sp data/lang $srcdir ${srcdir}_ali || exit 1;
# the command below is a more generic, but slower, way to do it.
# steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
# --nj $nj data/train_hires data/lang ${srcdir}_online ${srcdir}_ali || exit 1;
fi
if [ $stage -le 3 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
utils/create_split_dir.pl \
/export/b0{1,2,5,6}/$USER/kaldi-data/egs/ami-${mic}-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
fi
# have a higher maximum num-jobs if
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
steps/nnet2/get_egs_discriminative2.sh \
--stage 0 \
--cmd "$decode_cmd -tc $max_jobs" \
--online-ivector-dir exp/$mic/nnet2_online/ivectors_train_hires_sp2 \
--criterion $criterion --drop-frames $drop_frames \
data/$mic/train_hires_sp data/lang ${srcdir}{_ali,_denlats,/final.mdl,_degs} || exit 1;
# the command below is a more generic, but slower, way to do it.
#steps/online/nnet2/get_egs_discriminative2.sh \
# --cmd "$decode_cmd -tc $max_jobs" \
# --criterion $criterion --drop-frames $drop_frames \
# data/train_hires data/lang ${srcdir}{_ali,_denlats,_online,_degs} || exit 1;
fi
if [ $stage -le 4 ]; then
steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
--stage $train_stage \
--effective-lrate $effective_lrate \
--criterion $criterion --drop-frames $drop_frames \
--num-epochs $num_epochs \
--num-jobs-nnet 6 --num-threads $num_threads \
${srcdir}_degs ${srcdir}_${criterion}_${effective_lrate} || exit 1;
fi
if [ $stage -le 5 ]; then
dir=${srcdir}_${criterion}_${effective_lrate}
ln -sf $(readlink -f ${srcdir}_online/conf) $dir/conf # so it acts like an online-decoding directory
for epoch in $(seq $decode_start_epoch $num_epochs); do
for decode_set in dev eval; do
(
num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
decode_dir=$dir/decode_epoch${epoch}_${decode_set}_utt
steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
--per-utt true --iter epoch$epoch $graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1
) &
done
done
for epoch in $(seq $decode_start_epoch $num_epochs); do
for decode_set in dev eval; do
(
num_jobs=`cat data/$mic/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
decode_dir=$dir/decode_epoch${epoch}_${decode_set}_utt_offline
steps/online/nnet2/decode.sh --config conf/decode.conf --cmd "$decode_cmd" --nj $num_jobs \
--per-utt true --online false --iter epoch$epoch $graph_dir data/$mic/${decode_set}_hires $decode_dir || exit 1
) &
done
done
wait
fi
if [ $stage -le 6 ] && $cleanup; then
# if you run with "--cleanup true --stage 6" you can clean up.
rm ${srcdir}_denlats/lat.*.gz || true
rm ${srcdir}_ali/ali.*.gz || true
steps/nnet2/remove_egs.sh ${srcdir}_degs || true
fi
exit 0;
......@@ -174,7 +174,16 @@ fi
# TDNN training.
if [ $stage -le 13 ]; then
local/online/run_nnet2_ms_perturbed.sh --mic $mic
local/online/run_nnet2_ms_perturbed.sh \
--mic $mic \
--hidden-dim 950 \
--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
--use-sat-alignments true
local/online/run_nnet2_ms_sp_disc.sh \
--mic $mic \
--gmm-dir exp/$mic/tri4a \
--srcdir exp/$mic/nnet2_online/nnet_ms_sp
fi
echo "Done!"
......@@ -38,7 +38,6 @@ if [ $stage -le 2 ]; then
local/ami_sdm_scoring_data_prep.sh $AMI_DIR $micid dev
local/ami_sdm_scoring_data_prep.sh $AMI_DIR $micid eval
fi
# Here starts the normal recipe, which is mostly shared across mic scenarios,
# - for ihm we adapt to speaker by fMLLR,
# - for sdm and mdm we do not adapt for speaker, but for environment only (cmn),
......@@ -155,7 +154,20 @@ if [ $stage -le 12 ]; then
local/nnet/run_dnn_lda_mllt.sh $mic
fi
echo "Done!"
# TDNN training.
if [ $stage -le 13 ]; then
local/online/run_nnet2_ms_perturbed.sh \
--mic $mic \
--hidden-dim 850 \
--splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
--use-sat-alignments false
local/online/run_nnet2_ms_sp_disc.sh \
--mic $mic \
--gmm-dir exp/$mic/tri3a \
--srcdir exp/$mic/nnet2_online/nnet_ms_sp
fi
echo "Done."
# By default we do not build systems adapted to sessions for AMI in distant scnearios
......
......@@ -91,8 +91,10 @@ utils/split_data.sh $data $nj
if [ $nj_ali -eq $nj ]; then
ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
prior_ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz | copy-int-vector ark:- ark,t:- | utils/filter_scp.pl $dir/priors_uttlist | ali-to-pdf $alidir/final.mdl ark,t:- ark:- |"
else
ali_rspecifier="scp:$dir/ali.scp"
prior_ali_rspecifier="ark,s,cs:utils/filter_scp.pl $dir/priors_uttlist $dir/ali.scp | ali-to-pdf $alidir/final.mdl scp:- ark:- |"
if [ $stage -le 1 ]; then
echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
all_ids=$(seq -s, $nj_ali)
......@@ -266,7 +268,7 @@ echo "$0: dumping egs for prior adjustment in the background."
$cmd JOB=1:$nj $dir/log/create_priors_subset.JOB.log \
nnet-get-egs $ivectors_opt $nnet_context_opts "$priors_feats" \
"ark,s,cs:gunzip -c $alidir/ali.JOB.gz | copy-int-vector ark:- ark,t:- | utils/filter_scp.pl $dir/priors_uttlist | ali-to-pdf $alidir/final.mdl ark,t:- ark:- | ali-to-post ark:- ark:- |" \
"$prior_ali_rspecifier ali-to-post ark:- ark:- |" \
ark:- \| nnet-copy-egs ark:- $priors_egs_list || \
{ touch $dir/.error; echo "Error in creating priors subset. See $dir/log/create_priors_subset.*.log"; exit 1; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment