Commit e5799601 authored by Shinji Watanabe's avatar Shinji Watanabe
Browse files

Shinji Watanabe, Wed May 6 12:04:43 EDT 2015

Fixed some issues in CHiME3 examples:
1) Modified scoring functions to include 1-best ASR transcriptions
2) Enabled multi-thread options in decoding
3) Cleaned up some codes


git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5051 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 22de2537
......@@ -52,6 +52,7 @@ for task in simu real; do
echo ""
echo "-------------------"
done
echo ""
# for spreadsheet cut&paste
for task in simu real; do
rdir=$dir/decode_tgpr_5k_dt05_${task}_$enhan
......@@ -63,3 +64,13 @@ for task in simu real; do
done
cut -f 2 -d" " $dir/log/best_wer_$enhan
echo $lmw
echo "-------------------"
echo "1-best transcription"
echo "-------------------"
for task in simu real; do
rdir=$dir/decode_tgpr_5k_dt05_${task}_$enhan
cat $rdir/scoring/$lmw.tra \
| utils/int2sym.pl -f 2- $rdir/../graph_tgpr_5k/words.txt \
| sed s:\<UNK\>::g
done
......@@ -69,3 +69,13 @@ for task in simu real; do
done
cut -f 2 -d" " $dir/log/best_wer_$enhan
echo $lmw
echo "-------------------"
echo "1-best transcription"
echo "-------------------"
for task in simu real; do
rdir=$dir/decode_tgpr_5k_dt05_${task}_${enhan}_it$it
cat $rdir/scoring/$lmw.tra \
| utils/int2sym.pl -f 2- $graph_dir/words.txt \
| sed s:\<UNK\>::g
done
......@@ -77,9 +77,9 @@ data-fbank/tr05_multi_$enhan data-fbank/dt05_multi_$enhan data/lang $ali $ali_de
# decode enhan speech
utils/mkgraph.sh data/lang_test_tgpr_5k $dir $dir/graph_tgpr_5k || exit 1;
steps/nnet/decode.sh --nj 4 --acwt 0.10 --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --acwt 0.10 --config conf/decode_dnn.config \
$dir/graph_tgpr_5k data-fbank/dt05_real_$enhan $dir/decode_tgpr_5k_dt05_real_$enhan &
steps/nnet/decode.sh --nj 4 --acwt 0.10 --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --acwt 0.10 --config conf/decode_dnn.config \
$dir/graph_tgpr_5k data-fbank/dt05_simu_$enhan $dir/decode_tgpr_5k_dt05_simu_$enhan &
wait;
......@@ -104,10 +104,10 @@ steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr t
# Decode (reuse HCLG graph)
for ITER in 1; do
steps/nnet/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4a_dnn_tr05_multi_${enhan}/graph_tgpr_5k data-fbank/dt05_real_${enhan} $dir/decode_tgpr_5k_dt05_real_${enhan}_it${ITER} &
steps/nnet/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4a_dnn_tr05_multi_${enhan}/graph_tgpr_5k data-fbank/dt05_simu_${enhan} $dir/decode_tgpr_5k_dt05_simu_${enhan}_it${ITER} &
done
......@@ -129,19 +129,21 @@ steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr t
# Decode (reuse HCLG graph)
for ITER in 1 2 3 4; do
steps/nnet/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4a_dnn_tr05_multi_${enhan}/graph_tgpr_5k data-fbank/dt05_real_${enhan} $dir/decode_tgpr_5k_dt05_real_${enhan}_it${ITER} &
steps/nnet/decode.sh --nj 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
steps/nnet/decode.sh --nj 4 --num-threads 4 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
exp/tri4a_dnn_tr05_multi_${enhan}/graph_tgpr_5k data-fbank/dt05_simu_${enhan} $dir/decode_tgpr_5k_dt05_simu_${enhan}_it${ITER} &
done
wait
# decoded results of enhan speech using enhan DNN AMs
local/chime3_calc_wers.sh exp/tri4a_dnn_tr05_multi_$enhan $enhan \
| tee exp/tri4a_dnn_tr05_multi_$enhan/best_wer_$enhan.result
local/chime3_calc_wers.sh exp/tri4a_dnn_tr05_multi_$enhan $enhan > exp/tri4a_dnn_tr05_multi_$enhan/best_wer_$enhan.result
head -n 10 exp/tri4a_dnn_tr05_multi_$enhan/best_wer_$enhan.result
echo "wrote the result to exp/tri4a_dnn_tr05_multi_$enhan/best_wer_$enhan.result"
# decoded results of enhan speech using enhan DNN AMs with sequence training
./local/chime3_calc_wers_smbr.sh exp/tri4a_dnn_tr05_multi_${enhan}_smbr_i1lats ${enhan} exp/tri4a_dnn_tr05_multi_${enhan}/graph_tgpr_5k \
| tee exp/tri4a_dnn_tr05_multi_${enhan}_smbr_i1lats/best_wer_${enhan}.result
> exp/tri4a_dnn_tr05_multi_${enhan}_smbr_i1lats/best_wer_${enhan}.result
head -n 10 exp/tri4a_dnn_tr05_multi_${enhan}_smbr_i1lats/best_wer_${enhan}.result
echo "wrote the result to exp/tri4a_dnn_tr05_multi_${enhan}_smbr_i1lats/best_wer_${enhan}.result"
......@@ -54,9 +54,9 @@ utils/combine_data.sh data/tr05_multi_$enhan data/tr05_simu_$enhan data/tr05_rea
utils/combine_data.sh data/dt05_multi_$enhan data/dt05_simu_$enhan data/dt05_real_$enhan
# decode enhan speech using clean AMs
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_tr05_orig_clean/graph_tgpr_5k data/dt05_real_$enhan exp/tri3b_tr05_orig_clean/decode_tgpr_5k_dt05_real_$enhan &
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_tr05_orig_clean/graph_tgpr_5k data/dt05_simu_$enhan exp/tri3b_tr05_orig_clean/decode_tgpr_5k_dt05_simu_$enhan &
# training models using enhan data
......@@ -85,15 +85,15 @@ steps/train_sat.sh \
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b_tr05_multi_$enhan exp/tri3b_tr05_multi_$enhan/graph_tgpr_5k || exit 1;
# decode enhan speech using enhan AMs
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_tr05_multi_$enhan/graph_tgpr_5k data/dt05_real_$enhan exp/tri3b_tr05_multi_$enhan/decode_tgpr_5k_dt05_real_$enhan &
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_tr05_multi_$enhan/graph_tgpr_5k data/dt05_simu_$enhan exp/tri3b_tr05_multi_$enhan/decode_tgpr_5k_dt05_simu_$enhan &
wait;
# decoded results of enhan speech using clean AMs
local/chime3_calc_wers.sh exp/tri3b_tr05_orig_clean $enhan \
| tee exp/tri3b_tr05_orig_clean/best_wer_$enhan.result
local/chime3_calc_wers.sh exp/tri3b_tr05_orig_clean $enhan > exp/tri3b_tr05_orig_clean/best_wer_$enhan.result
head -n 11 exp/tri3b_tr05_orig_clean/best_wer_$enhan.result
# decoded results of enhan speech using enhan AMs
local/chime3_calc_wers.sh exp/tri3b_tr05_multi_$enhan $enhan \
| tee exp/tri3b_tr05_multi_$enhan/best_wer_$enhan.result
local/chime3_calc_wers.sh exp/tri3b_tr05_multi_$enhan $enhan > exp/tri3b_tr05_multi_$enhan/best_wer_$enhan.result
head -n 11 exp/tri3b_tr05_multi_$enhan/best_wer_$enhan.result
......@@ -72,7 +72,7 @@ else
list=$list" tr05_simu_noisy dt05_simu_noisy"
fi
mfccdir=mfcc
for x in $list; do
for x in $list; do
steps/make_mfcc.sh --nj $nj \
data/$x exp/make_mfcc/$x $mfccdir || exit 1;
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
......@@ -118,15 +118,15 @@ for train in tr05_multi_noisy tr05_orig_clean; do
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b_$train exp/tri3b_$train/graph_tgpr_5k || exit 1;
# if you want to know the result of the close talk microphone, plese try the following
# if you want to know the result of the close talk microphone, plese try the following
# decode close speech
#steps/decode_fmllr.sh --nj 4 \
#steps/decode_fmllr.sh --nj 4 --num-threads 4 \
# exp/tri3b_$train/graph_tgpr_5k data/dt05_real_close exp/tri3b_$train/decode_tgpr_5k_dt05_real_close &
# decode noisy speech
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_$train/graph_tgpr_5k data/dt05_real_noisy exp/tri3b_$train/decode_tgpr_5k_dt05_real_noisy &
# decode simu speech
steps/decode_fmllr.sh --nj 4 \
steps/decode_fmllr.sh --nj 4 --num-threads 4 \
exp/tri3b_$train/graph_tgpr_5k data/dt05_simu_noisy exp/tri3b_$train/decode_tgpr_5k_dt05_simu_noisy &
done
wait
......@@ -134,6 +134,6 @@ wait
# get the best scores
#for train in tr05_multi_noisy tr05_real_noisy tr05_simu_noisy tr05_orig_clean; do
for train in tr05_multi_noisy tr05_orig_clean; do
local/chime3_calc_wers.sh exp/tri3b_$train noisy \
| tee exp/tri3b_$train/best_wer_noisy.result
local/chime3_calc_wers.sh exp/tri3b_$train noisy > exp/tri3b_$train/best_wer_noisy.result
head -n 12 exp/tri3b_$train/best_wer_noisy.result
done
......@@ -56,7 +56,7 @@ cat $dir/cmudict/cmudict.0.7a.symbols | perl -ane 's:\r::; print;' | \
chop; m:^([^\d]+)(\d*)$: || die "Bad phone $_";
$phones_of{$1} .= "$_ "; }
foreach $list (values %phones_of) {print $list . "\n"; } ' \
> $dir/nonsilence_phones.txt || exit 1;
| sort > $dir/nonsilence_phones.txt || exit 1;
# A few extra questions that will be added to those obtained by automatically clustering
# the "real" phones. These ask about stress; there's also one for silence.
......
export KALDI_ROOT=`pwd`/../../..
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/tools/kaldi_lm/:$PWD:$PATH
export LC_ALL=C
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment