Commit 38ab1f44 authored by Shinji Watanabe's avatar Shinji Watanabe
Browse files

Modified AMI recipe:

1) Added DNN script
2) Fixed some bugs
3) Modified scoring functions (switch normal scoring when ihm condition)



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5015 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent f3209723
......@@ -15,3 +15,4 @@
export train_cmd=run.pl
export decode_cmd=run.pl
export highmem_cmd=run.pl
export cuda_cmd=run.pl
\ No newline at end of file
......@@ -103,7 +103,7 @@ if [ ! -z "$fisher" ]; then
&& exit 1;
mkdir -p $dir/fisher
find $fisher -path '*/trans/*fe*.txt' -exec cat {} \; | grep -v ^# | grep -v ^$ \
find $fisher -follow -path '*/trans/*fe*.txt' -exec cat {} \; | grep -v ^# | grep -v ^$ \
| cut -d' ' -f4- | gzip -c > $dir/fisher/text0.gz
gunzip -c $dir/fisher/text0.gz | fisher_map_words.pl \
| gzip -c > $dir/fisher/text1.gz
......
#!/bin/bash -u
. ./cmd.sh
. ./path.sh
# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Shinji Watanabe
if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <mic condition(ihm|sdm|mdm)>\n\n" `basename $0`
exit 1;
fi
mic=$1
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
for lm_suffix in $LM; do
# Config:
gmmdir=exp/$mic/tri4a
graph_dir=exp/$mic/tri4a/graph_${lm_suffix}
data_fmllr=data-fmllr-tri4
stage=0 # resume training with --stage=N
# End of config.
. utils/parse_options.sh || exit 1;
#
if [ $stage -le 0 ]; then
# Store fMLLR features, so we can train on them easily,
# test
dir=$data_fmllr/$mic/eval
steps/nnet/make_fmllr_feats.sh --nj 1 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_eval_${lm_suffix} \
$dir data/$mic/eval $gmmdir $dir/log $dir/data || exit 1
# dev
dir=$data_fmllr/$mic/dev
steps/nnet/make_fmllr_feats.sh --nj 1 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode_dev_${lm_suffix} \
$dir data/$mic/dev $gmmdir $dir/log $dir/data || exit 1
# train
dir=$data_fmllr/$mic/train
steps/nnet/make_fmllr_feats.sh --nj 1 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/$mic/train $gmmdir $dir/log $dir/data || exit 1
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 || exit 1
fi
if [ $stage -le 1 ]; then
# Pre-train DBN, i.e. a stack of RBMs
dir=exp/$mic/dnn4_pretrain-dbn
(tail --pid=$$ -F $dir/log/pretrain_dbn.log 2>/dev/null)& # forward log
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --rbm-iter 1 $data_fmllr/$mic/train $dir || exit 1;
fi
if [ $stage -le 2 ]; then
# Train the DNN optimizing per-frame cross-entropy.
dir=exp/$mic/dnn4_pretrain-dbn_dnn
ali=${gmmdir}_ali
feature_transform=exp/$mic/dnn4_pretrain-dbn/final.feature_transform
dbn=exp/$mic/dnn4_pretrain-dbn/6.dbn
(tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
$data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 --parallel-opts "-pe smp 4" \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1;
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
--num-threads 3 --parallel-opts "-pe smp 4" \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. We use usually good acwt 0.1
# Lattices are re-generated after 1st epoch, to get faster convergence.
dir=exp/$mic/dnn4_pretrain-dbn_dnn_smbr
srcdir=exp/$mic/dnn4_pretrain-dbn_dnn
acwt=0.1
if [ $stage -le 3 ]; then
# First we generate lattices and alignments:
steps/nnet/align.sh --nj 6 --cmd "$train_cmd" \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--acwt $acwt $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 4 ]; then
# Re-train the DNN by 1 iteration of sMBR
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode (reuse HCLG graph)
for ITER in 1; do
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --parallel-opts "-pe smp 4" \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1;
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --parallel-opts "-pe smp 4" \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1;
done
fi
# Re-generate lattices, run 4 more sMBR iterations
dir=exp/$mic/dnn4_pretrain-dbn_dnn_smbr_i1lats
srcdir=exp/$mic/dnn4_pretrain-dbn_dnn_smbr
acwt=0.1
if [ $stage -le 5 ]; then
# First we generate lattices and alignments:
steps/nnet/align.sh --nj 6 --cmd "$train_cmd" \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--acwt $acwt $data_fmllr/$mic/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 6 ]; then
# Re-train the DNN by 1 iteration of sMBR
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
$data_fmllr/$mic/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode (reuse HCLG graph)
for ITER in 1 2 3 4; do
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --parallel-opts "-pe smp 4" \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix}_$ITER || exit 1;
steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
--num-threads 3 --parallel-opts "-pe smp 4" \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix}_$ITER || exit 1;
done
fi
done
# Getting results [see RESULTS file]
# for x in exp/$mic/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
......@@ -34,9 +34,16 @@ fi
data=$1
if [ -f $data/stm ]; then # use sclite scoring.
eval local/score_asclite.sh --asclite $asclite $orig_args
if [ `echo $data | awk -F '/' '{print $2}'` = ihm ]; then
echo "use standard scoring took for ihm (close talk)"
eval steps/score_kaldi.sh $orig_args
elif [[ `echo $data | awk -F '/' '{print $2}'` =~ sdm* ]]; then
echo "use asclite for overlapped speech sdm condition"
eval local/score_asclite.sh --asclite $asclite $orig_args
elif [ `echo $data | awk -F '/' '{print $2}'` = mdm ]; then
echo "use asclite for overlapped speech mdm condition"
eval local/score_asclite.sh --asclite $asclite $orig_args
else
echo "$data/stm does not exist: using local/score_basic.sh"
eval local/score_basic.sh $orig_args
echo "local/score.sh: no ihm/sdm/mdm directories found. AMI recipe assumes data/{ihm,sdm,md}/... "
exit 1;
fi
export KALDI_ROOT=`pwd`/../../..
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
export LC_ALL=C
export LC_ALL=C # For expected sorting and joining behaviour
KALDI_ROOT=/gpfs/scratch/s1136550/kaldi-code
#KALDI_ROOT=/disk/data1/software/kaldi-trunk-atlas
#KALDI_ROOT=/disk/data1/pbell1/software/kaldi-trunk-mkl/
KALDISRC=$KALDI_ROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin
FSTBIN=$KALDI_ROOT/tools/openfst/bin
LMBIN=$KALDI_ROOT/tools/irstlm/bin
SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64
BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt-3.51
#BEAMFORMIT=/disk/data1/s1136550/BeamformIt-3.51
[ -d $PWD/local ] || { echo "Error: 'local' subdirectory not found."; }
[ -d $PWD/utils ] || { echo "Error: 'utils' subdirectory not found."; }
[ -d $PWD/steps ] || { echo "Error: 'steps' subdirectory not found."; }
export kaldi_local=$PWD/local
export kaldi_utils=$PWD/utils
export kaldi_steps=$PWD/steps
SCRIPTS=$kaldi_local:$kaldi_utils:$kaldi_steps
export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$BEAMFORMIT:$SRILM
#CUDA_VER='cuda-5.0.35'
#export PATH=$PATH:/opt/$CUDA_VER/bin
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/$CUDA_VER/lib64:/opt/$CUDA_VER/lib
export PATH=$PATH:$LMBIN:$BEAMFORMIT:$SRILM
......@@ -26,7 +26,6 @@ norm_vars=false
local/ami_download.sh ihm $AMI_DIR || exit 1;
#2) Data preparation
local/ami_text_prep.sh $AMI_DIR
local/ami_ihm_data_prep.sh $AMI_DIR || exit 1;
......@@ -43,7 +42,7 @@ local/ami_train_lms.sh --fisher $FISHER_TRANS data/ihm/train/text data/ihm/dev/t
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
nj=16
nj=30
prune-lm --threshold=1e-7 data/local/lm/$final_lm.gz /dev/stdout | \
gzip -c > data/local/lm/$LM.gz
......@@ -75,10 +74,10 @@ wait;
for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
# 4) Train systems
nj=16
nj=30
mkdir -p exp/$mic/mono
steps/train_mono.sh --nj $nj --cmd "$train_cmd" --feat-dim 39 --norm-vars $norm_vars \
steps/train_mono.sh --nj $nj --cmd "$train_cmd" --norm-vars $norm_vars \
data/$mic/train data/lang exp/$mic/mono >& exp/$mic/mono/train_mono.log || exit 1;
mkdir -p exp/$mic/mono_ali
......@@ -100,7 +99,7 @@ for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
>& exp/$mic/tri2a/train.log || exit 1;
for lm_suffix in $LM; do
# (
(
graph_dir=exp/$mic/tri2a/graph_${lm_suffix}
$highmem_cmd $graph_dir/mkgraph.log \
utils/mkgraph.sh data/lang_${lm_suffix} exp/$mic/tri2a $graph_dir
......@@ -111,7 +110,7 @@ for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
$graph_dir data/$mic/eval exp/$mic/tri2a/decode_eval_${lm_suffix}
# ) &
) &
done
mkdir -p exp/$mic/tri2a_ali
......@@ -136,7 +135,7 @@ for lm_suffix in $LM; do
steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
$graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${lm_suffix}
)
) &
done
# Train tri4a, which is LDA+MLLT+SAT
......@@ -159,9 +158,9 @@ for lm_suffix in $LM; do
steps/decode_fmllr.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
$graph_dir data/$mic/eval exp/$mic/tri4a/decode_eval_${lm_suffix}
)
) &
done
exit;
# MMI training starting from the LDA+MLLT+SAT systems
steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \
data/$mic/train data/lang exp/$mic/tri4a exp/$mic/tri4a_ali || exit 1
......@@ -183,22 +182,18 @@ for lm_suffix in $LM; do
graph_dir=exp/$mic/tri4a/graph_${lm_suffix}
for i in `seq 1 4`; do
decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_dev_${i}.mdl_${lm_suffix}
decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_dev_${i}.mdl_${lm_suffix}
steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
--transform-dir exp/$mic/tri4a/decode_dev_${lm_suffix} --iter $i \
$graph_dir data/$mic/dev $decode_dir
decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_eval_${i}.mdl_${lm_suffix}
steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
--transform-dir exp/$mic/tri4a/decode_eval_${lm_suffix} --iter $i \
$graph_dir data/$mic/eval $decode_dir
done
i=3 #simply assummed
decode_dir=exp/$mic/tri4a_mmi_b0.1/decode_eval_${i}.mdl_${lm_suffix}
steps/decode.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
--transform-dir exp/$mic/tri4a/decode_eval_${lm_suffix} --iter $i \
$graph_dir data/$mic/eval $decode_dir
)
) &
done
# here goes hybrid stuf
# in the ASRU paper we used different python nnet code, so someone needs to copy&adjust nnet or nnet2 switchboard commands
# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Some of them would be out of date.
local/run_dnn.sh $mic
......@@ -118,7 +118,7 @@ done
# skip SAT, and build MMI models
steps/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.config \
steps/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_denlats || exit 1;
......@@ -151,6 +151,8 @@ for lm_suffix in $LM; do
)
done
# here goes hybrid stuf
# in the ASRU paper we used different python nnet code, so someone needs to copy&adjust nnet or nnet2 switchboard commands
# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Some of them would be out of date.
local/run_dnn.sh $mic
......@@ -29,7 +29,7 @@ LM=$final_lm.pr1-7
DEV_SPK=$((`cut -d" " -f2 data/$mic/dev/utt2spk | sort | uniq -c | wc -l`))
EVAL_SPK=$((`cut -d" " -f2 data/$mic/eval/utt2spk | sort | uniq -c | wc -l`))
echo $DEV_SPK $EVAL_SPK
nj=16
nj=30
#GENERATE FEATS
mfccdir=mfcc_$mic
......@@ -51,7 +51,7 @@ for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
# TRAIN THE MODELS
mkdir -p exp/$mic/mono
steps/train_mono.sh --nj $nj --cmd "$train_cmd" --feat-dim 39 \
steps/train_mono.sh --nj $nj --cmd "$train_cmd" \
data/$mic/train data/lang exp/$mic/mono >& exp/$mic/mono/train_mono.log || exit 1;
mkdir -p exp/$mic/mono_ali
......@@ -83,7 +83,7 @@ for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
steps/decode.sh --nj $EVAL_SPK --cmd "$decode_cmd" --config conf/decode.conf \
$graph_dir data/$mic/eval exp/$mic/tri2a/decode_eval_${lm_suffix}
)
) &
done
#THE TARGET LDA+MLLT+SAT+BMMI PART GOES HERE:
......@@ -109,11 +109,11 @@ for lm_suffix in $LM; do
steps/decode.sh --nj $EVAL_SPK --cmd "$decode_cmd" --config conf/decode.conf \
$graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${lm_suffix}
)
) &
done
# skip SAT, and build MMI models
steps/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.config \
steps/make_denlats.sh --nj $nj --cmd "$decode_cmd" --config conf/decode.conf \
data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_denlats || exit 1;
......@@ -209,6 +209,8 @@ for lm_suffix in $LM; do
)&
done
# here goes hybrid stuf
# in the ASRU paper we used different python nnet code, so someone needs to copy&adjust nnet or nnet2 switchboard commands
# DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
# Some of them would be out of date.
local/run_dnn.sh $mic
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment