Commit de6498c3 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: add example for multi-language discriminative nnet training to...

trunk: add example for multi-language discriminative nnet training to egs/rm/s5/.  Misc minor fixes.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4766 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent b7c70050
......@@ -218,6 +218,20 @@ for x in exp/nnet2_online/nnet*/decode*; do grep WER $x/wer_* | utils/best_wer.s
# a shared phone set).'
# Note, I didn't tune the settings of this at all, it was just the first try.
#for x in exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 1.58 [ 198 / 12533, 29 ins, 37 del, 132 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode/wer_5
%WER 7.52 [ 943 / 12533, 89 ins, 169 del, 685 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode_ug/wer_12
%WER 1.56 [ 196 / 12533, 30 ins, 36 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode/wer_5
%WER 7.51 [ 941 / 12533, 90 ins, 162 del, 689 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode_ug/wer_12
# Discriminative training on top of the previous system (the joint system, with WSJ)... we don't get
# too much from it on this particular setup.
for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/wer* | utils/best_wer.sh ; done
%WER 1.56 [ 196 / 12533, 30 ins, 36 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch0/wer_5
%WER 1.57 [ 197 / 12533, 29 ins, 35 del, 133 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch1/wer_6
%WER 1.55 [ 194 / 12533, 29 ins, 35 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch2/wer_6
%WER 1.53 [ 192 / 12533, 32 ins, 30 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch3/wer_5
%WER 1.51 [ 189 / 12533, 33 ins, 28 del, 128 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch4/wer_5
%WER 7.51 [ 941 / 12533, 90 ins, 162 del, 689 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch0/wer_12
%WER 7.49 [ 939 / 12533, 89 ins, 150 del, 700 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch1/wer_12
%WER 7.37 [ 924 / 12533, 80 ins, 156 del, 688 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch2/wer_13
%WER 7.33 [ 919 / 12533, 80 ins, 153 del, 686 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch3/wer_13
%WER 7.36 [ 923 / 12533, 85 ins, 148 del, 690 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch4/wer_13
......@@ -18,6 +18,7 @@ train_stage=-10
srcdir=../../wsj/s5/exp/nnet2_online/nnet_ms_a_partial
src_alidir=../../wsj/s5/exp/tri4b_ali_si284 # it's important that this be the alignments
# we actually trained srcdir on.
src_lang=../../wsj/s5/data/lang
dir=exp/nnet2_online_wsj/nnet_ms_a
use_gpu=true
set -e
......@@ -68,10 +69,19 @@ if [ $stage -le 2 ]; then
echo "$0: doing the multilingual training."
# 4 jobs for WSJ, 1 for RM; this affects the data weighting. num-epochs is for
# first one (WSJ)
# first one (WSJ).
# the script said this:
# steps/nnet2/train_multilang2.sh: Will train for 7 epochs (of language 0) = 140 iterations
# steps/nnet2/train_multilang2.sh: 140 iterations is approximately 35 epochs for language 1
# note: the arguments to the --mix-up option are (number of mixtures for WSJ,
# number of mixtures for RM). We just use fairly typical numbers for each
# (although a bit fewer for WSJ, since we're not so concerned about the
# performance of that system).
steps/nnet2/train_multilang2.sh --num-jobs-nnet "4 1" \
--stage $train_stage \
--mix-up "4000 10000" \
--mix-up "10000 4000" \
--cleanup false --num-epochs 7 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--cmd "$train_cmd" --parallel-opts "$parallel_opts" --num-threads "$num_threads" \
......@@ -80,8 +90,12 @@ fi
if [ $stage -le 3 ]; then
# we're just preparing the RM setup for decoding, as we're not that interested
# in the WSJ model. Here, language 0 is WSJ and language 1 is RM.
# Prepare the RM and WSJ setups for decoding, with config files
# (for WSJ, we need the config files for discriminative training).
steps/online/nnet2/prepare_online_decoding_transfer.sh \
${srcdir}_online $src_lang $dir/0 ${dir}_wsj_online
steps/online/nnet2/prepare_online_decoding_transfer.sh \
${srcdir}_online data/lang $dir/1 ${dir}_rm_online
fi
......
#!/bin/bash
# this script is discriminative training after multi-language training (as
# run_nnet2_gale_combined_disc1.sh), but the discriminative training is
# multi-language too.
# some of the stages are the same as run_nnet2_gale_combined_disc1.sh,
# and we didn't repeat them (we used the --stage option, it defaults to 4).
# This script is to be run after run_nnet2_gale_combined.sh.
# It's discriminative training, using just the BOLT data.
# note, the _filt data has some bad conversations removed, that
# weren't aligning.
train_stage=-10
stage=0
# dir is the base directory, as in ./run_nnet2_wsj_joint.sh
dir=exp/nnet2_online_wsj/nnet_ms_a
criterion=smbr
drop_frames=false # only relevant for MMI.
learning_rate=0.00005
data_wsj=../../wsj/s5/data/train_si284_max2
data_rm=data/train_max2
lang_wsj=../../wsj/s5/data/lang
lang_rm=data/lang
num_epochs=4
set -e
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if [ $stage -le 1 ]; then
nj=30
sub_split=100
num_threads=6
steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads" \
--nj $nj --sub-split $sub_split --num-threads "$num_threads" \
$data_wsj $lang_wsj ${dir}_wsj_online ${dir}_wsj_denlats
fi
if [ $stage -le 2 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
nj=200
use_gpu=no
gpu_opts=
steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
--nj $nj $data_wsj $lang_wsj ${dir}_wsj_online ${dir}_wsj_ali
fi
if [ $stage -le 3 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
date=$(date +'%m_%d_%H_%M')
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/rm-$date/s5/${dir}_wsj_degs/storage ${dir}_wsj_degs/storage
fi
steps/online/nnet2/get_egs_discriminative2.sh \
--cmd "$decode_cmd --max-jobs-run 10" \
--criterion $criterion --drop-frames $drop_frames \
$data_wsj $lang_wsj ${dir}_wsj_{ali,denlats,online,degs}
fi
if [ $stage -le 4 ]; then
nj=30
sub_split=100
num_threads=6
steps/online/nnet2/make_denlats.sh \
--cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads" \
--nj $nj --sub-split $sub_split --num-threads "$num_threads" \
$data_rm $lang_rm ${dir}_rm_online ${dir}_rm_denlats
fi
if [ $stage -le 5 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
nj=200
use_gpu=no
gpu_opts=
steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
--nj $nj $data_rm $lang_rm ${dir}_rm_online ${dir}_rm_ali
fi
if [ $stage -le 6 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
date=$(date +'%m_%d_%H_%M')
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/rm-$date/s5/${dir}_rm_degs/storage ${dir}_rm_degs/storage
fi
steps/online/nnet2/get_egs_discriminative2.sh \
--cmd "$decode_cmd --max-jobs-run 10" \
--criterion $criterion --drop-frames $drop_frames \
$data_rm $lang_rm ${dir}_rm_{ali,denlats,online,degs}
fi
if [ $stage -le 7 ]; then
steps/nnet2/train_discriminative_multilang2.sh --cmd "$decode_cmd -l gpu=1" --stage $train_stage \
--learning-rate $learning_rate --num-jobs-nnet "4 1" \
--criterion $criterion --drop-frames $drop_frames \
--num-epochs $num_epochs --num-threads 1 \
${dir}_wsj_degs ${dir}_rm_degs ${dir}_${criterion}_${learning_rate}
fi
if [ $stage -le 8 ]; then
discdir=${dir}_${criterion}_${learning_rate}/1 # RM is directory number 1.
ln -sf $(readlink -f ${dir}_rm_online/conf) $discdir/conf
# ... so it acts like an online-decoding directory.
for epoch in $(seq 0 $num_epochs); do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--iter epoch$epoch exp/tri3b/graph data/test $discdir/decode_epoch$epoch &
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--iter epoch$epoch exp/tri3b/graph_ug data/test $discdir/decode_ug_epoch$epoch &
done
wait
fi
......@@ -188,6 +188,8 @@ steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
# ## RM/WSJ training; this doesn't require that the phone set be the same, it's
# ## a demonstration of a multilingual script.
# local/online/run_nnet2_wsj_joint.sh
# ## and the discriminative-training continuation of the above.
# local/online/run_nnet2_wsj_joint_disc.sh
# ## The following is an older way to do multilingual training, from an
# ## already-trained system.
......
......@@ -80,4 +80,3 @@ if [ $stage -le 7 ]; then
fi
exit 0;
......@@ -61,14 +61,11 @@ if [ $# -lt 3 ]; then
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|4> # Number of epochs of training"
echo " --initial-learning-rate <initial-learning-rate|0.0002> # Learning rate at start of training"
echo " --final-learning-rate <final-learning-rate|0.0004> # Learning rate at end of training"
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
echo " # training (will affect results as well as speed; try 8, 16)"
echo " # Note: if you increase this, you may want to also increase"
echo " # the learning rate. Also note: if there are fewer archives"
echo " # of egs than this, it will get reduced automatically."
echo " --num-epochs <#epochs|4> # Number of epochs of training (measured on language 0)"
echo " --learning-rate <learning-rate|0.0002> # Learning rate to use"
echo " --num-jobs-nnet <num-jobs|4 4> # Number of parallel jobs to use for main neural net:"
echo " # space separated list of num-jobs per language. Affects"
echo " # relative weighting."
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size. With GPU, must be 1."
......
......@@ -12,6 +12,13 @@
# takes multiple egs directories which must be created by get_egs2.sh, and the
# corresponding alignment directories (only needed for training the transition
# models).
# for the n languages, we share all the hidden layers but there are separate
# final layers. On each iteration of training we average the hidden layers
# across all jobs of all languages, but average the parameters of the final,
# output layer only within each language. The script starts from a partially
# trained model from the first language (language 0 in the directory-numbering
# scheme). See egs/rm/s5/local/online/run_nnet2_wsj_joint.sh for example.
#
# This script requires you to supply a neural net partially trained for the 1st
# language, by one of the regular training scripts, to be used as the initial
......
......@@ -52,7 +52,8 @@ foreach my $file (@ARGV) {
# If the symbolic link already exists, delete it.
if (-l $pseudo_storage) {
unlink($pseudo_storage);
print STDERR "$0: link $pseudo_storage already exists, not overwriting.\n";
next;
}
# Create the destination directory and make the link.
......
......@@ -135,6 +135,9 @@ for (my $x = 1; $x <= 3; $x++) { # This for-loop is to
if ($jobstart > $jobend) {
die "queue.pl: invalid job range $ARGV[0]";
}
if ($jobstart <= 0) {
die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is a GridEngine limitation).";
}
} elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
$array_job = 1;
$jobname = $1;
......
......@@ -57,6 +57,9 @@ if (@ARGV > 0) {
if ($jobstart > $jobend) {
die "run.pl: invalid job range $ARGV[0]";
}
if ($jobstart <= 0) {
die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility).";
}
} elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
$jobname = $1;
$jobstart = $2;
......
......@@ -79,6 +79,9 @@ for ($x = 1; $x <= 3; $x++) { # This for-loop is to
if ($jobstart > $jobend) {
die "queue.pl: invalid job range $ARGV[0]";
}
if ($jobstart <= 0) {
die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility).";
}
} elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
$jobname = $1;
$jobstart = $2;
......
......@@ -14,7 +14,7 @@ use File::Basename;
# have to be a fully qualified name.
#
# Later we may extend this so that on each line of .queue/machines you
# can specifiy various resources that each machine has, such as how
# can specify various resources that each machine has, such as how
# many slots and how much memory, and make it wait if machines are
# busy. But for now it simply ssh's to a machine from those in the list.
......@@ -59,6 +59,9 @@ if (@ARGV > 0) {
if ($jobstart > $jobend) {
die "run.pl: invalid job range $ARGV[0]";
}
if ($jobstart <= 0) {
die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility)";
}
} elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
$jobname = $1;
$jobstart = $2;
......
......@@ -538,8 +538,8 @@ function linux_check_dynamic {
echo "Atlas found in $dir";
return 0;
else
echo "... no libatlas.so in $dir";
return 1;
echo "... no libatlas.so in $dir";
return 1;
fi
}
......
......@@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
}
int32 channel = -1; // means channel info is unspecified.
// if each line has 5 elements then 5th element must be channel identifier
if(split_line.size() == 5) {
if (split_line.size() == 5) {
if (!ConvertStringToInteger(split_line[4], &channel) || channel < 0) {
KALDI_WARN << "Invalid line in segments file [bad channel]: " << line;
continue;
......
......@@ -176,7 +176,7 @@ int main(int argc, char *argv[]) {
<< " feature files, wrote " << num_egs_written << " examples, "
<< " with " << num_frames_written << " egs in total; "
<< num_err << " files had errors.";
return (num_egs_written == 0 ? 1 : 0);
return (num_egs_written == 0 || num_err > num_done ? 1 : 0);
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment