Commit ab542966 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: various bug-fixes relating to nnet2 training scripts.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4597 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 66cba1f2
......@@ -68,7 +68,7 @@ if [ $stage -le 1 ]; then
# Take the first 30k utterances (about 1/8th of the data) this will be used
# for the diagubm training
utils/subset_data_dir.sh --first data/train_nodev 30000 data/train_hires_30k
utils/subset_data_dir.sh --first data/train_hires_nodev 30000 data/train_hires_30k
local/remove_dup_utts.sh 200 data/train_hires_30k data/train_hires_30k_nodup # 33hr
# create a 100k subset for the lda+mllt training
......
......@@ -12,6 +12,12 @@ About TIMIT:
time-aligned orthographic, phonetic and word transcriptions as well as
a 16-bit, 16kHz speech waveform file for each utterance."
Note: please do not use this TIMIT setup as a generic example of how to run
Kaldi, as TIMIT has a very nonstandard structure. Any of the other setups
would be better for this purpose: e.g. librispeech/s5 is quite nice, and is
free; yesno is very tiny and fast to run and is also free; and wsj/s5 has an
unusually complete set of example scripts which may however be confusing.
Each subdirectory of this directory contains the scripts for a sequence
of experiments.
......
......@@ -68,8 +68,8 @@ lang=$2 # kept for historical reasons, but never used.
alidir=$3
dir=$4
[ -z "$left_context" ] && left_context=splice_width
[ -z "$right_context" ] && right_context=splice_width
[ -z "$left_context" ] && left_context=$splice_width
[ -z "$right_context" ] && right_context=$splice_width
# Check some files.
......
......@@ -57,8 +57,8 @@ lang=$2
alidir=$3
dir=$4
[ -z "$left_context" ] && left_context=splice_width
[ -z "$right_context" ] && right_context=splice_width
[ -z "$left_context" ] && left_context=$splice_width
[ -z "$right_context" ] && right_context=$splice_width
[ ! -z "$online_ivector_dir" ] && \
extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
......
......@@ -261,17 +261,18 @@ if [ $stage -le -2 ]; then
--num-targets $num_leaves \
configs $dir || exit -1;
cur_num_hidden_layer=1 # counts the number of hidden layers in the network
# this is different from the number of components in
# in the network, each hidden layer is composed of
# affine comp. + pnorm comp. + normalization comp.
# optionally a splice component is also added
$cmd $dir/log/nnet_init.log \
nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
$dir/0.mdl || exit 1;
fi
cur_num_hidden_layer=1 # counts the number of hidden layers in the network
# this is different from the number of components in
# in the network, each hidden layer is composed of
# affine comp. + pnorm comp. + normalization comp.
# optionally a splice component is also added
if [ $stage -le -1 ]; then
echo "Training transition probabilities and setting priors"
$cmd $dir/log/train_trans.log \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment