Commit 7d8da8bb authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/online: merging changes from trunk.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4318 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parents a2192216 851318fa
......@@ -144,13 +144,6 @@ steps/train_mpe.sh --cmd "$train_cmd" data/train data/lang exp/tri5a_ali exp/tr
steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \
--transform-dir exp/tri5a/decode \
exp/tri5a/graph data/dev exp/tri5a_mpe/decode || exit 1 ;
# Do MCE.
steps/train_mce.sh --cmd "$train_cmd" data/train data/lang exp/tri5a_ali exp/tri5a_denlats exp/tri5a_mce || exit 1;
steps/decode.sh --cmd "$decode_cmd" --nj 10 --config conf/decode.config \
--transform-dir exp/tri5a/decode \
exp/tri5a/graph data/dev exp/tri5a_mce/decode || exit 1 ;
# getting results (see RESULTS file)
for x in exp/*/decode; do [ -d $x ] && grep Sum $x/score_*/*.sys | utils/best_wer.sh; done 2>/dev/null
......
......@@ -15,6 +15,8 @@ fi
steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2/decode
steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2/decode_ug
# decoding results are essentially the same (any small difference is probably because
# decode.config != decode_dnn.config).
......@@ -42,6 +44,7 @@ steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" \
# %WER 5.13 [ 643 / 12533, 82 ins, 144 del, 417 sub ] exp/dnn4b_nnet2_raw_no_cmvn/decode/wer_6
steps/online/nnet2/prepare_online_decoding.sh data/lang \
exp/dnn4b_nnet2_raw_no_cmvn exp/dnn4b_nnet2_raw_no_cmvn_online
......@@ -54,3 +57,77 @@ steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --n
# It's slightly better than the offline decoding and I'm not sure why, as all the decoding
# parameters seem to be the same. It may be some slight difference in how the lattices
# are determinized.
( # We demonstrate doing further training on top of a model initially
# trained by Karel's tools.
nnet-am-switch-preconditioning exp/dnn4b_nnet2/final.mdl - | \
nnet-am-copy --learning-rate=0.001 - exp/dnn4b_nnet2/final.mdl.mod
mkdir -p exp/dnn4b_nnet2_retrain
steps/nnet2/get_egs.sh --samples-per-iter 200000 \
--num-jobs-nnet 4 --splice-width 5 --cmd "$train_cmd" \
data/train data/lang exp/tri3b_ali \
exp/dnn4b_nnet2_retrain
# options here are for GPU use.
steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
--parallel-opts "-l gpu=1" --num-threads 1 --minibatch-size 512 \
exp/dnn4b_nnet2/final.mdl.mod exp/dnn4b_nnet2_retrain/egs exp/dnn4b_nnet2_retrain
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_retrain/decode
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2_retrain/decode_ug
#Results for this experiment:
#for x in exp/dnn4b_nnet2_retrain/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
#%WER 1.58 [ 198 / 12533, 29 ins, 38 del, 131 sub ] exp/dnn4b_nnet2_retrain/decode/wer_3
#%WER 7.60 [ 953 / 12533, 56 ins, 168 del, 729 sub ] exp/dnn4b_nnet2_retrain/decode_ug/wer_10
# vs. the following baseline (our experiment got 0.2% abs. improvement on unigram only).
#for x in exp/dnn4b_nnet2/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
# %WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
#%WER 7.80 [ 977 / 12533, 83 ins, 151 del, 743 sub ] exp/dnn4b_nnet2/decode_ug/wer_6
)
(
# We demonstrate doing further training on top of a DBN trained
# generatively by Karel's tools.
mkdir -p exp/dnn4b_nnet2_dbn_in
for f in final.mdl final.feature_transform ali_train_pdf.counts; do
cp exp/dnn4b_pretrain-dbn_dnn/$f exp/dnn4b_nnet2_dbn_in/
done
cp exp/dnn4b_pretrain-dbn/6.dbn exp/dnn4b_nnet2_dbn_in/final.dbn
steps/nnet2/convert_nnet1_to_nnet2.sh exp/dnn4b_nnet2_dbn_in exp/dnn4b_nnet2_dbn
cp exp/tri3b/splice_opts exp/tri3b/cmvn_opts exp/tri3b/final.mat exp/tri3b/tree exp/dnn4b_nnet2_dbn/
nnet-am-switch-preconditioning exp/dnn4b_nnet2_dbn/final.mdl - | \
nnet-am-copy --learning-rate=0.01 - exp/dnn4b_nnet2_dbn/final.mdl.mod
steps/nnet2/get_egs.sh --samples-per-iter 200000 \
--num-jobs-nnet 4 --splice-width 5 --cmd "$train_cmd" \
data/train data/lang exp/tri3b_ali \
exp/dnn4b_nnet2_dbn_retrain
steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
--parallel-opts "-l gpu=1" --num-threads 1 --minibatch-size 512 \
exp/dnn4b_nnet2_dbn/final.mdl.mod exp/dnn4b_nnet2_dbn_retrain/egs exp/dnn4b_nnet2_dbn_retrain
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_dbn_retrain/decode &
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
--config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2_dbn_retrain/decode_ug &
# Here are the results (and note that we never tuned this at all, it was our first guess
# at what might be good parameters).
#for x in exp/dnn4b_nnet2_dbn_retrain/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
#%WER 1.68 [ 210 / 12533, 36 ins, 43 del, 131 sub ] exp/dnn4b_nnet2_dbn_retrain/decode/wer_3
#%WER 7.86 [ 985 / 12533, 72 ins, 172 del, 741 sub ] exp/dnn4b_nnet2_dbn_retrain/decode_ug/wer_8
# Here is the baseline... we're slightly worse than the baseline on both test scenarios.
#for x in exp/dnn4b_nnet2/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
#%WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
#%WER 7.80 [ 977 / 12533, 83 ins, 151 del, 743 sub ] exp/dnn4b_nnet2/decode_ug/wer_6
)
......@@ -89,6 +89,21 @@ exit 0
%WER 5.74 [ 324 / 5643, 46 ins, 41 del, 237 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg/wer_19
%WER 5.90 [ 333 / 5643, 46 ins, 39 del, 248 sub ] exp/tri3b/decode_bd_tgpr_eval92_tg/wer_18
# this section demonstrates RNNLM-HS rescoring (commented out by default)
# the exact results might differ insignificantly due to hogwild in RNNLM-HS training that introduces indeterminism
%WER 5.92 [ 334 / 5643, 58 ins, 32 del, 244 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg/wer_14 # baseline (no rescoring)
%WER 5.42 [ 306 / 5643, 50 ins, 34 del, 222 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs100_0.3/wer_16
%WER 5.49 [ 310 / 5643, 47 ins, 36 del, 227 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs300_0.3/wer_18
%WER 5.90 [ 333 / 5643, 54 ins, 37 del, 242 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs30_0.15/wer_15
%WER 5.49 [ 310 / 5643, 45 ins, 38 del, 227 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.15/wer_18
%WER 5.49 [ 310 / 5643, 45 ins, 38 del, 227 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.15_N1000/wer_18
%WER 5.33 [ 301 / 5643, 41 ins, 41 del, 219 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.3/wer_20
%WER 5.40 [ 305 / 5643, 41 ins, 41 del, 223 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N10/wer_20
%WER 5.33 [ 301 / 5643, 41 ins, 41 del, 219 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.3_N1000/wer_20
%WER 5.26 [ 297 / 5643, 44 ins, 36 del, 217 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.4/wer_18
%WER 5.25 [ 296 / 5643, 44 ins, 36 del, 216 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.4_N1000/wer_18
%WER 5.26 [ 297 / 5643, 42 ins, 39 del, 216 sub ] exp/tri3b/decode_bd_tgpr_eval92_fg_rnnlm-hs400_0.5_N1000/wer_20
%WER 14.17 [ 1167 / 8234, 222 ins, 123 del, 822 sub ] exp/tri3b/decode_tgpr_dev93/wer_17
%WER 19.37 [ 1595 / 8234, 315 ins, 153 del, 1127 sub ] exp/tri3b/decode_tgpr_dev93.si/wer_15
......
......@@ -16,8 +16,11 @@ cmd=run.pl
nwords=10000 # This is how many words we're putting in the vocab of the RNNLM.
hidden=30
class=200 # Num-classes... should be somewhat larger than sqrt of nwords.
direct=1000 # Probably number of megabytes to allocate for hash-table for "direct" connections.
direct=1000 # Number of weights that are used for "direct" connections, in millions.
rnnlm_ver=rnnlm-0.3e # version of RNNLM to use
threads=1 # for RNNLM-HS
bptt=2 # length of BPTT unfolding in RNNLM
bptt_block=20 # length of BPTT unfolding in RNNLM
# End configuration section.
[ -f ./path.sh ] && . ./path.sh
......@@ -42,20 +45,24 @@ export PATH=$KALDI_ROOT/tools/$rnnlm_ver:$PATH
# needed for me as I ran on a machine that had been setup
# as 64 bit by default.
cd $KALDI_ROOT/tools || exit 1;
if [ -d $rnnlm_ver ]; then
if [ -f $rnnlm_ver/rnnlm ]; then
echo Not installing the rnnlm toolkit since it is already there.
else
echo Downloading and installing the rnnlm tools
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
if [ ! -f $rnnlm_ver.tgz ]; then
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
if [ $rnnlm_ver == "rnnlm-hs-0.1b" ]; then
extras/install_rnnlm_hs.sh
else
echo Downloading and installing the rnnlm tools
# http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz
if [ ! -f $rnnlm_ver.tgz ]; then
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/$rnnlm_ver.tgz || exit 1;
fi
mkdir $rnnlm_ver
cd $rnnlm_ver
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
make CC=g++ || exit 1;
echo Done making the rnnlm tools
fi
fi
mkdir $rnnlm_ver
cd $rnnlm_ver
tar -xvzf ../$rnnlm_ver.tgz || exit 1;
make CC=g++ || exit 1;
echo Done making the rnnlm tools
fi
) || exit 1;
......@@ -128,15 +135,15 @@ echo "Training RNNLM (note: this uses a lot of memory! Run it on a big machine.)
# -direct-order 4 -direct 1000 -binary >& $dir/rnnlm1.log &
$cmd $dir/rnnlm.log \
$KALDI_ROOT/tools/$rnnlm_ver/rnnlm -independent -train $dir/train -valid $dir/valid \
-rnnlm $dir/rnnlm -hidden $hidden -rand-seed 1 -debug 2 -class $class -bptt 2 -bptt-block 20 \
$KALDI_ROOT/tools/$rnnlm_ver/rnnlm -threads $threads -independent -train $dir/train -valid $dir/valid \
-rnnlm $dir/rnnlm -hidden $hidden -rand-seed 1 -debug 2 -class $class -bptt $bptt -bptt-block $bptt_block \
-direct-order 4 -direct $direct -binary || exit 1;
# make it like a Kaldi table format, with fake utterance-ids.
cat $dir/valid.in | awk '{ printf("uttid-%d ", NR); print; }' > $dir/valid.with_ids
utils/rnnlm_compute_scores.sh $dir $dir/tmp.valid $dir/valid.with_ids \
utils/rnnlm_compute_scores.sh --rnnlm_ver $rnnlm_ver $dir $dir/tmp.valid $dir/valid.with_ids \
$dir/valid.scores
nw=`wc -w < $dir/valid.with_ids` # Note: valid.with_ids includes utterance-ids which
# is one per word, to account for the </s> at the end of each sentence; this is the
......
......@@ -66,6 +66,18 @@ local/wsj_format_data.sh || exit 1;
# local/wsj_train_rnnlms.sh --cmd "$decode_cmd -l mem_free=16G" \
# --hidden 300 --nwords 40000 --class 400 --direct 2000 data/local/rnnlm.h300.voc40k &
# )
false && \ # Comment this out to train RNNLM-HS
(
num_threads_rnnlm=8
local/wsj_train_rnnlms.sh --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
--cmd "$decode_cmd -l mem_free=1G" --bptt 4 --bptt-block 10 --hidden 30 --nwords 10000 --direct 0 data/local/rnnlm-hs.h30.voc10k
local/wsj_train_rnnlms.sh --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
--cmd "$decode_cmd -l mem_free=1G" --bptt 4 --bptt-block 10 --hidden 100 --nwords 20000 --direct 0 data/local/rnnlm-hs.h100.voc20k
local/wsj_train_rnnlms.sh --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
--cmd "$decode_cmd -l mem_free=1G" --bptt 4 --bptt-block 10 --hidden 300 --nwords 30000 --direct 0 data/local/rnnlm-hs.h300.voc30k
local/wsj_train_rnnlms.sh --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
--cmd "$decode_cmd -l mem_free=1G" --bptt 4 --bptt-block 10 --hidden 400 --nwords 40000 --direct 0 data/local/rnnlm-hs.h400.voc40k
)
) &
......@@ -246,6 +258,10 @@ steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_bd_tgpr data/lang_test_bd_
# that build the RNNLMs, so it would fail.
# local/run_rnnlms_tri3b.sh
# The command below is commented out as we commented out the steps above
# that build the RNNLMs (HS version), so it would fail.
# wait; local/run_rnnlm-hs_tri3b.sh
# The following two steps, which are a kind of side-branch, try mixing up
( # from the 3b system. This is to demonstrate that script.
steps/mixup.sh --cmd "$train_cmd" \
......
......@@ -28,19 +28,35 @@ dir=$2
mkdir -p $dir/log || exit 1;
for f in $src/final.mdl $src/final.nnet $src/final.feature_transform $src/ali_train_pdf.counts; do
for f in $src/final.mdl $src/final.feature_transform $src/ali_train_pdf.counts; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
done
# We could do the following things all as one long piped command,
# but it will be easier to debug if we make them separate.
$cmd $dir/log/convert_feature_transform.log \
nnet1-to-raw-nnet $src/final.feature_transform $dir/0.raw || exit 1;
$cmd $dir/log/convert_model.log \
nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
if [ -f $src/final.nnet ]; then
echo "$0: $src/final.nnet exists, using it as input."
$cmd $dir/log/convert_model.log \
nnet1-to-raw-nnet $src/final.nnet $dir/1.raw || exit 1;
elif [ -f $src/final.dbn ]; then
echo "$0: $src/final.dbn exists, using it as input."
num_leaves=$(am-info $src/final.mdl | grep -w pdfs | awk '{print $NF}') || exit 1;
dbn_output_dim=$(nnet-info exp/dnn4b_pretrain-dbn/6.dbn | grep component | tail -n 1 | sed s:,::g | awk '{print $NF}') || exit 1;
[ -z "$dbn_output_dim" ] && exit 1;
cat > $dir/final_layer.conf <<EOF
AffineComponent input-dim=$dbn_output_dim output-dim=$num_leaves learning-rate=0.001
SoftmaxComponent dim=$num_leaves
EOF
$cmd $dir/log/convert_model.log \
nnet1-to-raw-nnet $src/final.dbn - \| \
raw-nnet-concat - "raw-nnet-init $dir/final_layer.conf -|" $dir/1.raw || exit 1;
else
echo "$0: expected either $src/final.nnet or $src/final.dbn to exist"
fi
$cmd $dir/log/append_model.log \
raw-nnet-concat $dir/0.raw $dir/1.raw $dir/concat.raw || exit 1;
......
......@@ -37,17 +37,19 @@ if [ $# != 4 ]; then
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-jobs-nnet <num-jobs|16> # Number of parallel jobs to use for main neural net"
echo " --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-jobs-nnet <num-jobs;16> # Number of parallel jobs to use for main neural net"
echo " # training (will affect results as well as speed; try 8, 16)"
echo " # Note: if you increase this, you may want to also increase"
echo " # the learning rate."
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " --samples-per-iter <#samples;400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " --feat-type <lda|raw> # (by default it tries to guess). The feature type you want"
echo " # to use as input to the neural net."
echo " --splice-width <width;4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
echo " # very end."
echo " --stage <stage|0> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
......
......@@ -377,8 +377,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -121,11 +121,8 @@ if [ $# != 4 ]; then
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
echo " # (the validation subset is held out from training)"
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --first-component-power <power|1.0> # Power applied to output of first p-norm layer... setting this to"
echo " # 0.5 seems to help under some circumstances."
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
......@@ -425,8 +422,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -176,12 +176,10 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
$data $lang $alidir $dir || exit 1;
fi
echo $egs_dir
if [ -z $egs_dir ]; then
egs_dir=$dir/egs
fi
echo $egs_dir
iters_per_epoch=`cat $egs_dir/iters_per_epoch` || exit 1;
! [ $num_jobs_nnet -eq `cat $egs_dir/num_jobs_nnet` ] && \
echo "$0: Warning: using --num-jobs-nnet=`cat $egs_dir/num_jobs_nnet` from $egs_dir"
......@@ -397,8 +395,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -132,11 +132,8 @@ if [ $# != 4 ]; then
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
echo " # (the validation subset is held out from training)"
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --first-component-power <power|1.0> # Power applied to output of first p-norm layer... setting this to"
echo " # 0.5 seems to help under some circumstances."
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
......@@ -478,7 +475,6 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
fi
......@@ -120,13 +120,8 @@ if [ $# != 4 ]; then
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
echo " # (the validation subset is held out from training)"
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
echo " # very end."
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
......@@ -423,8 +418,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -390,8 +390,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -134,13 +134,8 @@ if [ $# != 4 ]; then
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
echo " # (the validation subset is held out from training)"
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
echo " # very end."
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
......@@ -455,8 +450,7 @@ echo Done
if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -66,6 +66,8 @@ if [ -f path.sh ]; then . ./path.sh; fi
if [ $# != 5 ]; then
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <model-dir> <exp-dir>"
echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet exp/tri4b_nnet"
echo "See also the more recent script train_more.sh which requires the egs"
echo "directory."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
......@@ -140,12 +142,10 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
$data $lang $alidir $dir || exit 1;
fi
echo $egs_dir
if [ -z $egs_dir ]; then
egs_dir=$dir/egs
fi
echo $egs_dir
iters_per_epoch=`cat $egs_dir/iters_per_epoch` || exit 1;
! [ $num_jobs_nnet -eq `cat $egs_dir/num_jobs_nnet` ] && \
echo "$0: Warning: using --num-jobs-nnet=`cat $egs_dir/num_jobs_nnet` from $egs_dir"
......@@ -263,7 +263,7 @@ if $cleanup; then
echo Cleaning up data
if [ $egs_dir == "$dir/egs" ]; then
echo Removing training examples
rm $dir/egs/egs*
steps/nnet2/remove_egs.sh $dir/egs
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
......
......@@ -13,8 +13,10 @@ use_phi=false # This is kind of an obscure option. If true, we'll remove the o
# difference (if any) to WER, it's more so we know we are doing the right thing.
test=false # Activate a testing option.
stage=1 # Stage of this script, for partial reruns.
rnnlm_ver=rnnlm-0.3e
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh
......@@ -151,7 +153,7 @@ fi
if [ $stage -le 6 ]; then
echo "$0: invoking rnnlm_compute_scores.sh which calls rnnlm, to get RNN LM scores."
$cmd JOB=1:$nj $dir/log/rnnlm_compute_scores.JOB.log \
utils/rnnlm_compute_scores.sh $rnndir $adir.JOB/temp $adir.JOB/words_text $adir.JOB/lmwt.rnn \
utils/rnnlm_compute_scores.sh --rnnlm_ver $rnnlm_ver $rnndir $adir.JOB/temp $adir.JOB/words_text $adir.JOB/lmwt.rnn \
|| exit 1;
fi
if [ $stage -le 7 ]; then
......
......@@ -17,9 +17,12 @@
# words; unk.probs gives the probs for words given this class, and it
# has, on each line, "word prob".
rnnlm_ver=rnnlm-0.3e
. ./path.sh || exit 1;
. utils/parse_options.sh
rnnlm=$KALDI_ROOT/tools/rnnlm-0.3e/rnnlm
rnnlm=$KALDI_ROOT/tools/$rnnlm_ver/rnnlm
[ ! -f $rnnlm ] && echo No such program $rnnlm && exit 1;
......
......@@ -31,48 +31,6 @@ using namespace kaldi;
namespace kaldi {
/*
* ASSERTS
*/
template<typename Real>
static void AssertEqual(const MatrixBase<Real> &A,
const MatrixBase<Real> &B,
float tol = 0.001) {
KALDI_ASSERT(A.NumRows() == B.NumRows()&&A.NumCols() == B.NumCols());
for (MatrixIndexT i = 0;i < A.NumRows();i++) {
for (MatrixIndexT j = 0;j < A.NumCols();j++) {
KALDI_ASSERT(std::abs(A(i, j)-B(i, j)) <= tol*std::max(1.0, (double) (std::abs(A(i, j))+std::abs(B(i, j)))));
}
}
}
template<typename Real>
static void AssertEqual(const CuMatrixBase<Real> &A,
const CuMatrixBase<Real> &B,
float tol = 0.001) {
Real Anorm = A.FrobeniusNorm(), Bnorm = B.FrobeniusNorm();
CuMatrix<Real> diff(A);
diff.AddMat(-1.0, B);
Real diff_norm = diff.FrobeniusNorm();
if (diff_norm > tol * 0.5 * (Anorm + Bnorm)) {
KALDI_LOG << "A = " << A;
KALDI_LOG << "B = " << B;
KALDI_ERR << "Matrices differ, " << diff_norm << " > " << tol << " * 0.5 * ( "
<< Anorm << " + " << Bnorm << " ). ";
}
}
template<typename Real>
static void AssertEqual(const CuBlockMatrix<Real> &A,
const CuBlockMatrix<Real> &B,
float tol = 0.001) {
CuMatrix<Real> Acopy(A), Bcopy(B);
AssertEqual(Acopy, Bcopy, tol);
}
template<typename Real>
static bool ApproxEqual(const CuBlockMatrix<Real> &A,
const CuBlockMatrix<Real> &B,
......@@ -84,7 +42,6 @@ static bool ApproxEqual(const CuBlockMatrix<Real> &A,
template<class Real>
static void UnitTestCuBlockMatrixIO() {
for (int32 i = 0; i < 10; i++) {
......
......@@ -35,6 +35,7 @@
#include "cudamatrix/cu-common.h"
#include "cudamatrix/cu-device.h"
#include "cudamatrix/cu-matrix.h"
#include "base/kaldi-error.h"
#include "util/common-utils.h"
......@@ -408,6 +409,27 @@ void CuDevice::DeviceGetName(char* name, int32 len, int32 dev) {
}
void CuDevice::CheckGpuHealth() {
if(!Enabled()) return;
Timer t;
// prepare small matrices for a quick test
Matrix<BaseFloat> a(50, 100);
Matrix<BaseFloat> b(100 ,50);
a.SetRandn();
b.SetRandUniform();
// multiply 2 small matrices in CPU:
Matrix<BaseFloat> c(50, 50);
c.AddMatMat(1.0, a, kNoTrans, b, kNoTrans, 0.0);
// multiply same matrices in GPU:
CuMatrix<BaseFloat> c1(50, 50);
c1.AddMatMat(1.0, CuMatrix<BaseFloat>(a), kNoTrans, CuMatrix<BaseFloat>(b), kNoTrans, 0.0);
// check that relative differnence is <1%
AssertEqual(c, Matrix<BaseFloat>(c1), 0.01);
// measure time spent in this check
AccuProfile(__func__, t.Elapsed());
}
struct CuAllocatorOptions {
bool cache_memory; // Enable GPU memory caching, (false = disable).
int32 count; // Number of times we free and delete a particular size before we
......
......@@ -95,6 +95,10 @@ class CuDevice {
std::string GetFreeMemory(int64* free = NULL, int64* total = NULL) const;
/// Get the name of the GPU