Commit a90799db authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/online: committing a bunch of changes to the online setup, including...

sandbox/online: committing a bunch of changes to the online setup, including code and scripts for retraining models on top of models for other setups.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/online@4342 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 0bb39061
......@@ -8,3 +8,6 @@ for x in exp/*/decode_dev; do grep WER $x/wer_* | utils/best_wer.sh; done
%WER 31.13 [ 12184 / 39141, 1939 ins, 2584 del, 7661 sub ] exp/tri5a_0.1/decode_dev/wer_12
%WER 23.66 [ 9259 / 39141, 1495 ins, 2432 del, 5332 sub ] exp/nnet6c4_gpu/decode_dev/wer_11
%WER 25.12 [ 9832 / 39141, 1423 ins, 2471 del, 5938 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_dev_utt/wer_11
%WER 23.79 [ 9311 / 39141, 1499 ins, 2277 del, 5535 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_dev/wer_11
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
#!/bin/bash
. cmd.sh
stage=1
train_stage=-10
use_gpu=true
set -e
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
parallel_opts="-l gpu=1"
num_threads=1
minibatch_size=512
# the _a is in case I want to change the parameters.
dir=exp/nnet2_online/nnet_a_gpu
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
dir=exp/nnet2_online/nnet_a
fi
if [ $stage -le 1 ]; then
mkdir -p exp/nnet2_online
# To train a diagonal UBM we don't need very much data, so use the smallest subset.
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 400000 \
data/train_30k 512 exp/tri5a exp/nnet2_online/diag_ubm
fi
if [ $stage -le 2 ]; then
# iVector extractors can in general be sensitive to the amount of data, but
# this one has a fairly small dim (defaults to 100) so we don't use all of it,
# we use just the 100k subset (about one sixteenth of the data).
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
data/train_100k exp/nnet2_online/diag_ubm exp/nnet2_online/extractor || exit 1;
fi
if [ $stage -le 3 ]; then
ivectordir=exp/nnet2_online/ivectors_train
if [ $USER == dpovey ]; then # this shows how you can split across multiple file-systems.
utils/create_split_dir.pl /export/b0{1,2,3,4}/dpovey/kaldi-online/egs/fisher_english/s5/$ivectordir $ivectordir/storage
fi
# We extract iVectors on all the train data, which will be what we
# train the system on. This version of the iVector-extraction script
# pairs the utterances into twos (by default, see --utts-per-spk-max option)
# and treats each of these pairs as one speaker.
# Note that these are extracted 'online'.
steps/online/nnet2/extract_ivectors_online2.sh --cmd "$train_cmd" --nj 60 \
data/train exp/nnet2_online/extractor $ivectordir || exit 1;
fi
if [ $stage -le 4 ]; then
if [ $USER == dpovey ]; then # this shows how you can split across multiple file-systems.
utils/create_split_dir.pl /export/b0{1,2,3,4}/dpovey/kaldi-online/egs/fisher_english/s5/$dir/egs $dir/egs/storage
fi
# Because we have a lot of data here and we don't want the training to take
# too long, we reduce the number of epochs from the defaults (15 + 5) to (1 +
# 1). The option "--io-opts '-tc 12'" is to have more than the default number
# (5) of jobs dumping the egs to disk; this is OK since we're splitting our
# data across four filesystems for speed.
steps/nnet2/train_pnorm_fast.sh --stage $train_stage --cleanup false \
--num-epochs 3 --num-epochs-extra 1 \
--splice-width 7 --feat-type raw \
--online-ivector-dir exp/nnet2_online/ivectors_train \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--io-opts "-tc 12" \
--num-jobs-nnet 6 \
--num-hidden-layers 4 \
--mix-up 12000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--cmd "$decode_cmd" \
--pnorm-input-dim 3500 \
--pnorm-output-dim 350 \
data/train data/lang exp/tri5a $dir || exit 1;
fi
if [ $stage -le 5 ]; then
# dump iVectors for the testing data.
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
data/dev exp/nnet2_online/extractor exp/nnet2_online/ivectors_dev || exit 1;
fi
if [ $stage -le 6 ]; then
# this does offline decoding that should give about the same results as the
# real online decoding.
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/nnet2_online/ivectors_${data} \
exp/tri5a/graph data/dev $dir/decode_dev || exit 1;
fi
if [ $stage -le 7 ]; then
# If this setup used PLP features, we'd have to give the option --feature-type plp
# to the script below.
steps/online/nnet2/prepare_online_decoding.sh data/lang exp/nnet2_online/extractor \
"$dir" ${dir}_online || exit 1;
fi
if [ $stage -le 8 ]; then
# do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker.
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
exp/tri5a/graph data/dev ${dir}_online/decode_dev || exit 1;
fi
if [ $stage -le 9 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true \
exp/tri5a/graph data/dev ${dir}_online/decode_dev_utt || exit 1;
fi
exit 0;
#Baseline: GMM+SAT system.
#%WER 31.07 [ 12163 / 39141, 1869 ins, 2705 del, 7589 sub ] exp/tri5a/decode_dev/wer_13
# Baseline: p-norm system on top of fMLLR features.
#%WER 23.66 [ 9259 / 39141, 1495 ins, 2432 del, 5332 sub ] exp/nnet6c4_gpu/decode_dev/wer_11
# Our experiment, with per-utterance decoding:
#%WER 25.12 [ 9832 / 39141, 1423 ins, 2471 del, 5938 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_dev_utt/wer_11
# Our experiment, carrying forward the adaptation state between
# utterances of each speaker.
#%WER 23.79 [ 9311 / 39141, 1499 ins, 2277 del, 5535 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_dev/wer_11
export KALDI_ROOT=`pwd`/../../..
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$PWD:$PATH
export LC_ALL=C
......@@ -171,3 +171,5 @@ steps/train_sat.sh --cmd "$train_cmd" \
# local/run_for_spkid.sh
# local/run_nnet2.sh
# local/online/run_nnet2.sh
......@@ -29,6 +29,9 @@ EOF
dir=exp/nnet2_online_wsj/nnet_gpu
trainfeats=exp/nnet2_online_wsj/wsj_activations_train_gpu
srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_gpu_online
# the following things are needed while training the combined model.
srcdir_orig=../../wsj/s5/exp/nnet2_online/nnet_a_gpu
ivector_src=../../wsj/s5/exp/nnet2_online/extractor
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
......@@ -38,6 +41,9 @@ else
dir=exp/nnet2_online_wsj/nnet
trainfeats=exp/nnet2_online_wsj/wsj_activations_train
srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_online
# the following things are needed while training the combined model.
srcdir_orig=../../wsj/s5/exp/nnet2_online/nnet_a
ivector_src=../../wsj/s5/exp/nnet2_online/extractor
fi
......@@ -88,8 +94,77 @@ if [ $stage -le 4 ]; then
wait
fi
## From this point on we try something else: we try training all the layers of
## the model on this dataset. First we need to create a combined version of the
## model.
if [ $stage -le 5 ]; then
steps/nnet2/create_appended_model.sh $srcdir_orig $dir ${dir}_combined_init
# Set the learning rate in this initial value to our guess of a suitable value.
# note: we initially tried 0.005, and this gave us WERs of (1.40, 1.48, 7.24, 7.70) vs.
# (1.32, 1.38, 7.20, 7.44) with a learning rate of 0.01.
initial_learning_rate=0.01
nnet-am-copy --learning-rate=$initial_learning_rate ${dir}_combined_init/final.mdl ${dir}_combined_init/final.mdl
fi
# In order to train the combined model, we'll need to dump iVectors.
if [ $stage -le 6 ]; then
steps/online/nnet2/extract_ivectors_online2.sh --cmd "$train_cmd" --nj 10 \
data/train $ivector_src exp/nnet2_online_wsj/ivectors || exit 1;
fi
if [ $stage -le 7 ]; then
# assume left and right context of model are identical.
splice_width=$(nnet-am-info exp/nnet2_online_wsj/nnet_gpu_combined_init/final.mdl | grep '^left-context' | awk '{print $2}') || exit 1;
# Note: in general the get_egs.sh script would get things like the LDA matrix
# from exp/tri3b_ali, which would be the wrong thing to do as we want to get
# them from the original model dir. In this case we're using raw MFCC
# features so it's not an issue. But in general we'd probably have to create
# a temporary dir and copy or link both the alignments and feature-related
# things to it.
steps/nnet2/get_egs.sh --cmd "$train_cmd" \
--feat-type raw --cmvn-opts "--norm-means=false --norm-vars=false" \
--online-ivector-dir exp/nnet2_online_wsj/ivectors \
--num-jobs-nnet 4 --splice-width $splice_width \
data/train data/lang exp/tri3b_ali ${dir}_combined
fi
if [ $stage -le 8 ]; then
steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
${dir}_combined_init/final.mdl ${dir}_combined/egs ${dir}_combined
fi
if [ $stage -le 9 ]; then
# Create an online-decoding dir corresponding to what we just trained above.
# If this setup used PLP features, we'd have to give the option --feature-type plp
# to the script below.
steps/online/nnet2/prepare_online_decoding.sh data/lang $ivector_src \
${dir}_combined ${dir}_combined_online || exit 1;
fi
# Here are the results:
if [ $stage -le 10 ]; then
# do the online decoding on top of the retrained _combined_online model, and
# also the per-utterance version of the online decoding.
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
exp/tri3b/graph data/test ${dir}_combined_online/decode &
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug &
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--per-utt true exp/tri3b/graph data/test ${dir}_combined_online/decode_per_utt &
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--per-utt true exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug_per_utt || exit 1;
wait
fi
exit 0;
# Here are the results when we just retrain the last layer:
# grep WER exp/nnet2_online_wsj/nnet_gpu_online/decode/wer_* | utils/best_wer.sh
#%WER 1.61 [ 202 / 12533, 22 ins, 46 del, 134 sub ] exp/nnet2_online_wsj/nnet_gpu_online/decode/wer_3
#a11:s5: grep WER exp/nnet2_online_wsj/nnet_gpu_online/decode_ug/wer_* | utils/best_wer.sh
......@@ -99,6 +174,14 @@ fi
# %WER 1.72 [ 216 / 12533, 26 ins, 45 del, 145 sub ] exp/nnet2_online_wsj/nnet_gpu_online/decode_utt/wer_3
# %WER 8.40 [ 1053 / 12533, 85 ins, 158 del, 810 sub ] exp/nnet2_online_wsj/nnet_gpu_online/decode_ug_utt/wer_6
#, here when we retrain the whole thing:
# %WER 1.32 [ 165 / 12533, 14 ins, 34 del, 117 sub ] exp/nnet2_online_wsj/nnet_gpu_combined_online/decode/wer_3
# %WER 7.20 [ 902 / 12533, 78 ins, 127 del, 697 sub ] exp/nnet2_online_wsj/nnet_gpu_combined_online/decode_ug/wer_6
# and with per-utterance decoding:
# %WER 1.38 [ 173 / 12533, 19 ins, 32 del, 122 sub ] exp/nnet2_online_wsj/nnet_gpu_combined_online/decode_per_utt/wer_3
# %WER 7.44 [ 932 / 12533, 57 ins, 163 del, 712 sub ] exp/nnet2_online_wsj/nnet_gpu_combined_online/decode_ug_per_utt/wer_8
# And this is a suitable baseline: a system trained on RM only.
#a11:s5: grep WER exp/nnet2_online/nnet_gpu_online/decode/wer_* | utils/best_wer.sh
#%WER 2.20 [ 276 / 12533, 25 ins, 69 del, 182 sub ] exp/nnet2_online/nnet_gpu_online/decode/wer_8
......
......@@ -130,8 +130,6 @@ if [ $stage -le 8 ]; then
done
fi
[ $stage -eq 8 ] && exit 1; ## This is temporary.
if [ $stage -le 9 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
......@@ -173,8 +171,8 @@ for x in exp/nnet2_online/nnet_a_gpu/decode_eval2000_*; do grep Sum $x/score_*/*
# the experiment tested using truly-online decoding, tested separately per
# utterance (which should in principle give the same results as the batch-mode
# test, which also was per-utterance); I'm not sure what the reason for the slight improvement
# is.
# test, which also was per-utterance); I'm not sure what the reason for the
# slight improvement is.
%WER 21.43 [ 10594 / 49427, 1219 ins, 3005 del, 6370 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_train_dev_sw1_fsh_tgpr_per_utt/wer_12
%WER 21.88 [ 10817 / 49427, 1247 ins, 2969 del, 6601 sub ] exp/nnet2_online/nnet_a_gpu_online/decode_train_dev_sw1_tg_per_utt/wer_12
%WER 17.8 | 1831 21395 | 84.0 10.6 5.4 1.8 17.8 56.0 | exp/nnet2_online/nnet_a_gpu_online/decode_eval2000_sw1_fsh_tgpr_per_utt/score_12/eval2000.ctm.swbd.filt.sys
......
#!/bin/bash
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
# This script is for use with "retrain_fast.sh"; it combines the original model
# that you trained on top of, with the single layer model you trained, so that
# you can do joint backpropagation.
# Begin configuration options.
cmd=run.pl
# End configuration options.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 <original-nnet-dir> <new-nnet-dir> <combined-nnet-dir>"
echo "where <original-nnet-dir> will typically be a normal neural net from another corpus,"
echo "and <new-nnet-dir> will usually be a single-layer neural net trained on top of it by"
echo "dumping the activations (e.g. using steps/online/nnet2/dump_nnet_activations.sh, I"
echo "think no such script exists for non-online), and then training using"
echo "steps/nnet2/retrain_fast.sh."
echo "e.g.: $0 ../../swbd/s5b/exp/nnet2_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online exp/nnet2_swbd_online/nnet_gpu_online_combined"
fi
src1=$1
src2=$2
dir=$3
for f in $src1/final.mdl $src1/cmvn_opts $src2/tree $src2/final.mdl; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
done
mkdir -p $dir/log
info=$dir/nnet_info
nnet-am-info $src1/final.mdl >$info
nc=$(grep num-components $info | awk '{print $2}');
if grep SumGroupComponent $info >/dev/null; then
nc_truncate=$[$nc-3] # we did mix-up: remove AffineComponent,
# SumGroupComponent, SoftmaxComponent
else
# we didn't mix-up:
nc_truncate=$[$nc-2] # remove AffineComponent, SoftmaxComponent
fi
$cmd $dir/log/get_raw_nnet.log \
nnet-to-raw-nnet --truncate=$nc_truncate $src1/final.mdl $dir/first_nnet.raw || exit 1;
$cmd $dir/log/append_nnet.log \
nnet-insert --randomize-next-component=false --insert-at=0 \
$src2/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1;
$cleanup && rm $dir/first_nnet.raw
# Copy the tree etc.,
cp $src2/tree $dir || exit 1;
# Copy feature-related things from src1 where we built the initial model.
# Note: if you've done anything like mess with the feature-extraction configs,
# or changed the feature type, you have to keep track of that yourself.
for f in final.mat cmvn_opts splice_opts; do
if [ -f $src1/$f ]; then
cp $src1/$f $dir || exit 1;
fi
done
echo "$0: created appended model in $dir"
......@@ -23,6 +23,8 @@ io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one t
splice_width=4 # meaning +- 4 frames on each side for second LDA
random_copy=false
online_ivector_dir=
ivector_randomize_prob=0.0 # if >0.0, randomizes iVectors during training with
# this prob per iVector.
cmvn_opts= # can be used for specifying CMVN options, if feature type is not lda.
echo "$0 $@" # Print the command line for logging
......@@ -147,9 +149,9 @@ if [ ! -z "$online_ivector_dir" ]; then
ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
ivectors_opt="--const-feat-dim=$ivector_dim"
ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
feats="$feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
valid_feats="$valid_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
train_subset_feats="$train_subset_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
feats="$feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- | ivector-randomize --randomize-prob=$ivector_randomize_prob ark:- ark:- |' ark:- |"
valid_feats="$valid_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- | ivector-randomize --randomize-prob=$ivector_randomize_prob ark:- ark:- |' ark:- |"
train_subset_feats="$train_subset_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- | ivector-randomize --randomize-prob=$ivector_randomize_prob ark:- ark:- |' ark:- |"
fi
if [ $stage -le 0 ]; then
......
......@@ -19,6 +19,8 @@ num_feats=10000 # maximum number of feature files to use. Beyond a certain poin
# gets silly to use more data.
lda_dim= # This defaults to no dimension reduction.
online_ivector_dir=
ivector_randomize_prob=0.0 # if >0.0, randomizes iVectors during training with
# this prob per iVector.
ivector_dir=
cmvn_opts= # allows you to specify options for CMVN, if feature type is not lda.
......@@ -131,7 +133,7 @@ spliced_feats="$feats splice-feats --left-context=$splice_width --right-context=
if [ ! -z "$online_ivector_dir" ]; then
ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
# note: subsample-feats, with negative value of n, repeats each feature n times.
spliced_feats="$spliced_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
spliced_feats="$spliced_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- | ivector-randomize --randomize-prob=$ivector_randomize_prob ark:- ark:- |' ark:- |"
ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
else
ivector_dim=0
......
......@@ -45,12 +45,17 @@ for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
origdir=$dir
dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
# configuration files we write will contain absolute
# pathnames.
mkdir -p $dir/conf $dir/log
cp $nnet_src/tree $dir/ || exit 1;
# There are a bunch of files that we will need to copy from $online_src, because
# we're aiming to have one self-contained directory that has everything in it.
cp -rT $online_src/ivector_extractor/ $dir/ivector_extractor
......
......@@ -9,7 +9,7 @@ LDLIBS += $(CUDA_LDLIBS)
BINFILES = online2-wav-gmm-latgen-faster apply-cmvn-online \
extend-wav-with-silence compress-uncompress-speex \
online2-wav-nnet2-latgen-faster ivector-extract-online2 \
online2-wav-dump-features
online2-wav-dump-features ivector-randomize
OBJFILES =
......
// bin/copy-matrix.cc
// Copyright 2014 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
#include "transform/transform-common.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
const char *usage =
"Copy matrices of online-estimated iVectors, but randomize them;\n"
"this is intended primarily for training the online nnet2 setup\n"
"with iVectors. For each input matrix, each row with index t is,\n"
"with probability given by the option --randomize-prob, replaced\n"
"with the contents an input row chosen randomly from the interval [t, T]\n"
"where T is the index of the last row of the matrix.\n"
"\n"
"Usage: ivector-randomize [options] <ivector-rspecifier> <ivector-wspecifier>\n"
" e.g.: ivector-randomize ark:- ark:-\n"
"See also: ivector-extract-online, ivector-extract-online2, subsample-feats\n";
int32 srand_seed = 0;
BaseFloat randomize_prob = 0.5;
ParseOptions po(usage);
po.Register("srand", &srand_seed, "Seed for random number generator");
po.Register("randomize-prob", &randomize_prob, "For each row, replace it with a "
"random row with this probability.");
po.Read(argc, argv);
if (po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
std::string ivector_rspecifier = po.GetArg(1),
ivector_wspecifier = po.GetArg(2);
int num_done = 0;
SequentialBaseFloatMatrixReader reader(ivector_rspecifier);
BaseFloatMatrixWriter writer(ivector_wspecifier);
for (; !reader.Done(); reader.Next(), num_done++) {
std::string utt = reader.Key();
const Matrix<BaseFloat> &ivectors_in = reader.Value();
int32 T = ivectors_in.NumRows(), dim = ivectors_in.NumCols();
Matrix<BaseFloat> ivectors_out(T, dim, kUndefined);
for (int32 t = 0; t < T; t++) {
int32 t_src;
if (WithProb(randomize_prob)) t_src = RandInt(t, T-1);
else t_src = t;
ivectors_out.Row(t).CopyFromVec(ivectors_in.Row(t_src));
}
writer.Write(utt, ivectors_out);
num_done++;
}
KALDI_LOG << "Randomized " << num_done << " iVectors.";
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment