Commit d0ef62c7 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: merging from sandbox/online a couple files that were missed due to...

trunk: merging from sandbox/online a couple files that were missed due to conflict (thanks Vassil for noticing)

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4364 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 732d2833
......@@ -39,7 +39,7 @@ samples_per_iter=200000 # each iteration of training, see this many samples
num_jobs_nnet=16 # Number of neural net jobs to run in parallel. This option
# is passed to get_egs.sh.
get_egs_stage=0
spk_vecs_dir=
online_ivector_dir=
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
......@@ -81,6 +81,9 @@ lda_opts=
lda_dim=
egs_opts=
transform_dir= # If supplied, overrides alidir
cmvn_opts= # will be passed to get_lda.sh and get_egs.sh, if supplied.
# only relevant for "raw" features, not lda.
feat_type= # Can be used to force "raw" features.
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
# more than enough.
# End configuration section.
......@@ -163,26 +166,33 @@ utils/split_data.sh $data $nj
mkdir -p $dir/log
echo $nj > $dir/num_jobs
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
cp $alidir/splice_opts $dir 2>/dev/null
cp $alidir/tree $dir
extra_opts=()
[ ! -z "$cmvn_opts" ] && extra_opts+=(--cmvn-opts "$cmvn_opts")
[ ! -z "$feat_type" ] && extra_opts+=(--feat-type $feat_type)
[ ! -z "$online_ivector_dir" ] && extra_opts+=(--online-ivector-dir $online_ivector_dir)
[ -z "$transform_dir" ] && transform_dir=$alidir
extra_opts+=(--transform-dir $transform_dir)
extra_opts+=(--splice-width $splice_width)
if [ $stage -le -4 ]; then
echo "$0: calling get_lda.sh"
steps/nnet2/get_lda.sh $lda_opts --splice-width $splice_width --cmd "$cmd" --transform-dir $transform_dir $data $lang $alidir $dir || exit 1;
steps/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --cmd "$cmd" $data $lang $alidir $dir || exit 1;
fi
# these files will have been written by get_lda.sh
feat_dim=`cat $dir/feat_dim` || exit 1;
lda_dim=`cat $dir/lda_dim` || exit 1;
feat_dim=$(cat $dir/feat_dim) || exit 1;
ivector_dim=$(cat $dir/ivector_dim) || exit 1;
lda_dim=$(cat $dir/lda_dim) || exit 1;
if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
echo "$0: calling get_egs.sh"
[ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
steps/nnet2/get_egs.sh $spk_vecs_opt --samples-per-iter $samples_per_iter --num-jobs-nnet $num_jobs_nnet \
--splice-width $splice_width --stage $get_egs_stage --cmd "$cmd" $egs_opts --io-opts "$io_opts" --transform-dir $transform_dir \
[ ! -z $spk_vecs_dir ] && egs_opts="$egs_opts --spk-vecs-dir $spk_vecs_dir";
steps/nnet2/get_egs.sh $egs_opts "${extra_opts[@]}" \
--samples-per-iter $samples_per_iter \
--num-jobs-nnet $num_jobs_nnet --stage $get_egs_stage \
--cmd "$cmd" $egs_opts --io-opts "$io_opts" \
$data $lang $alidir $dir || exit 1;
fi
......@@ -203,29 +213,16 @@ fi
if [ $stage -le -2 ]; then
echo "$0: initializing neural net";
# Get spk-vec dim (in case we're using them).
if [ ! -z "$spk_vecs_dir" ]; then
spk_vec_dim=$[$(copy-vector --print-args=false "ark:cat $spk_vecs_dir/vecs.1|" ark,t:- | head -n 1 | wc -w) - 3];
! [ $spk_vec_dim -gt 0 ] && echo "Error getting spk-vec dim" && exit 1;
ext_lda_dim=$[$lda_dim + $spk_vec_dim]
extend-transform-dim --new-dimension=$ext_lda_dim $dir/lda.mat $dir/lda_ext.mat || exit 1;
lda_mat=$dir/lda_ext.mat
ext_feat_dim=$[$feat_dim + $spk_vec_dim]
else
spk_vec_dim=0
lda_mat=$dir/lda.mat
ext_lda_dim=$lda_dim
ext_feat_dim=$feat_dim
fi
lda_mat=$dir/lda.mat
tot_input_dim=$[$feat_dim+$ivector_dim]
online_preconditioning_opts="alpha=$alpha num-samples-history=$num_samples_history update-period=$update_period rank-in=$precondition_rank_in rank-out=$precondition_rank_out max-change-per-sample=$max_change_per_sample"
stddev=`perl -e "print 1.0/sqrt($pnorm_input_dim);"`
cat >$dir/nnet.config <<EOF
SpliceComponent input-dim=$ext_feat_dim left-context=$splice_width right-context=$splice_width const-component-dim=$spk_vec_dim
SpliceComponent input-dim=$tot_input_dim left-context=$splice_width right-context=$splice_width const-component-dim=$ivector_dim
FixedAffineComponent matrix=$lda_mat
AffineComponentPreconditionedOnline input-dim=$ext_lda_dim output-dim=$pnorm_input_dim $online_preconditioning_opts learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
AffineComponentPreconditionedOnline input-dim=$lda_dim output-dim=$pnorm_input_dim $online_preconditioning_opts learning-rate=$initial_learning_rate param-stddev=$stddev bias-stddev=$bias_stddev
PnormComponent input-dim=$pnorm_input_dim output-dim=$pnorm_output_dim p=$p
EOF
if [ $first_component_power != 1.0 ]; then
......
......@@ -81,6 +81,9 @@ egs_dir=
lda_opts=
egs_opts=
transform_dir=
cmvn_opts= # will be passed to get_lda.sh and get_egs.sh, if supplied.
# only relevant for "raw" features, not lda.
feat_type= # Can be used to force "raw" features.
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
# more than enough.
# End configuration section.
......@@ -159,17 +162,19 @@ sdata=$data/split$nj
utils/split_data.sh $data $nj
mkdir -p $dir/log
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
cp $alidir/splice_opts $dir 2>/dev/null
cp $alidir/cmvn_opts $dir 2>/dev/null
cp $alidir/tree $dir
extra_opts=()
[ ! -z "$cmvn_opts" ] && extra_opts+=(--cmvn-opts "$cmvn_opts")
[ ! -z "$feat_type" ] && extra_opts+=(--feat-type $feat_type)
[ ! -z "$online_ivector_dir" ] && extra_opts+=(--online-ivector-dir $online_ivector_dir)
[ -z "$transform_dir" ] && transform_dir=$alidir
extra_opts+=(--transform-dir $transform_dir)
extra_opts+=(--splice-width $splice_width)
if [ $stage -le -4 ]; then
echo "$0: calling get_lda.sh"
steps/nnet2/get_lda.sh $lda_opts --transform-dir $transform_dir --splice-width $splice_width --cmd "$cmd" $data $lang $alidir $dir || exit 1;
steps/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --cmd "$cmd" $data $lang $alidir $dir || exit 1;
fi
# these files will have been written by get_lda.sh
......@@ -178,13 +183,13 @@ lda_dim=`cat $dir/lda_dim` || exit 1;
if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
echo "$0: calling get_egs.sh"
[ ! -z $spk_vecs_dir ] && spk_vecs_opt="--spk-vecs-dir $spk_vecs_dir";
steps/nnet2/get_egs.sh $spk_vecs_opt --transform-dir $transform_dir --samples-per-iter $samples_per_iter \
--num-jobs-nnet $num_jobs_nnet --splice-width $splice_width --stage $get_egs_stage \
[ ! -z $spk_vecs_dir ] && egs_opts="$egs_opts --spk-vecs-dir $spk_vecs_dir";
steps/nnet2/get_egs.sh $egs_opts "${extra_opts[@]}" \
--samples-per-iter $samples_per_iter \
--num-jobs-nnet $num_jobs_nnet --stage $get_egs_stage \
--cmd "$cmd" $egs_opts --io-opts "$io_opts" \
$data $lang $alidir $dir || exit 1;
fi
if [ -z $egs_dir ]; then
egs_dir=$dir/egs
fi
......
......@@ -366,11 +366,6 @@ MAX_INITIALIZER_LINES = 30
SHOW_USED_FILES = YES
# If the sources in your project are distributed over multiple directories
# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
# in the documentation. The default is NO.
SHOW_DIRECTORIES = NO
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from the
......@@ -661,11 +656,6 @@ HTML_FOOTER =
HTML_STYLESHEET =
# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
# files or namespaces will be aligned in HTML using tables. If set to
# NO a bullet list will be used.
HTML_ALIGN_MEMBERS = YES
# If the GENERATE_HTMLHELP tag is set to YES, additional index files
# will be generated that can be used as input for tools like the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment