Commit f521d7f2 authored by Korbinian Riedhammer's avatar Korbinian Riedhammer
Browse files

added an option to deliberately ignore fmllr matrices although present

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1632 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 93b59477
......@@ -7,7 +7,7 @@
# Begin configuration.
cmd=run.pl
config=
stage=-4
stage=-5
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
realign_iters="10 20 30";
mllt_iters="2 4 6 12";
......@@ -23,6 +23,7 @@ randprune=4.0 # This is approximately the ratio by which we will speed up the
splice_opts=
cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves
normft2=false # typically the tandem features will already be normalized due to pca
extra_lda=true
# End configuration.
echo "$0 $@" # Print the command line for logging
......@@ -38,6 +39,7 @@ if [ $# != 7 ]; then
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
echo " --normft2 (true|false) # apply CMVN to second data set?"
echo " --extra-lda (true|false) # apply extra LDA after feature paste (lower dim!); default true"
exit 1;
fi
......@@ -76,8 +78,9 @@ sdata2=$data2/split$nj;
feats1="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata1/JOB/utt2spk scp:$sdata1/JOB/cmvn.scp scp:$sdata1/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- |"
# Now estimate LDA, which will only be applied to the spectral features
# (assuming that the tandem features were already discriminatively trained)
if [ $stage -le -4 ]; then
# (assuming that the tandem features were already discriminatively trained).
# This is instead of the deltas.
if [ $stage -le -5 ]; then
echo "Accumulating LDA statistics (this only applies to the base feature part)."
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
......@@ -114,6 +117,20 @@ echo $normft2 > $dir/normft2
# Begin training; initially, we have no MLLT matrix
cur_mllt_iter=0
if [ $stage -le -4 -a $extra_lda ]; then
echo "Accumulating LDA statistics (for tandem features this time)."
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
acc-lda --rand-prune=$randprune $alidir/final.mdl "$tandemfeats" ark,s,cs:- \
$dir/lda.JOB.acc || exit 1;
est-lda --write-full-matrix=$dir/full.mat --dim=$dim $dir/0.mat $dir/lda.*.acc \
2>$dir/log/lda_est.log || exit 1;
rm $dir/lda.*.acc
feats="$tandemfeats transform-feats $dir/0.mat ark:- ark:- |"
fi
if [ $stage -le -3 ]; then
echo "Accumulating tree stats"
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
......@@ -189,8 +206,8 @@ while [ $x -lt $num_iters ]; do
gmm-transform-means $dir/$x.mat.new $dir/$x.mdl $dir/$x.mdl \
2> $dir/log/transform_means.$x.log || exit 1;
# see if this is the first MLLT iteration; otherwise compose transforms
if [ $cur_mllt_iter == 0 ]; then
# see if this is the first MLLT iteration and there is no lda; otherwise compose transforms
if [ $cur_mllt_iter == 0 -a ! $extra_lda ]; then
mv $dir/$x.mat.new $dir/$x.mat || exit 1;
else
compose-transforms --print-args=false $dir/$x.mat.new $dir/$cur_mllt_iter.mat $dir/$x.mat || exit 1;
......
......@@ -15,6 +15,7 @@ num_gselect1=50 # first stage of Gaussian-selection
num_gselect2=25 # second stage.
intermediate_num_gauss=2000
num_iters=3
no_fmllr=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
......@@ -30,7 +31,8 @@ if [ $# != 5 ]; then
echo " --config <config-file> # config containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --silence-weight <sil-weight> # weight for silence (e.g. 0.5 or 0.0)"
echo " --num-iters <#iters> # Number of iterations of E-M"
echo " --num-iters <#iters> # Number of iterations of E-M"\
echo " --no-fmllr (true|false) # ignore speaker matrices even if present"
exit 1;
fi
......@@ -72,9 +74,15 @@ case $feat_type in
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ -f $alidir/trans.1 ]; then
echo "$0: using transforms from $alidir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
if $no_fmllr; then
echo "$0: deliberately ignoring speaker transforms from $alidir"
else
echo "$0: using transforms from $alidir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$alidir/trans.JOB ark:- ark:- |"
fi
fi
##
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment