Commit aca2786e authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1: fixing 'blocksoftmax' example, adding result,



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5224 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 55d8f863
......@@ -3,7 +3,6 @@ for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; d
exit 0
# Monophone, MFCC+delta+accel
%WER 8.74 [ 1095 / 12533, 143 ins, 226 del, 726 sub ] exp/mono/decode/wer_2
# MFCC+delta+accel
......@@ -164,23 +163,6 @@ exit 0
%WER 7.73 [ 969 / 12533, 74 ins, 157 del, 738 sub ] exp/nnet5e_mpe_gpu/decode_ug_epoch4/wer_9
# DNN systems (Karel - 25.9.2014)
# Per-frame cross-entropy training
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
%WER 7.77 [ 974 / 12533, 81 ins, 158 del, 735 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_7
# Sequence-based sMBR training
%WER 1.61 [ 202 / 12533, 32 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_3
%WER 1.62 [ 203 / 12533, 33 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it2/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it3/wer_3
%WER 1.64 [ 206 / 12533, 32 ins, 42 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it4/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 41 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it5/wer_3
%WER 1.64 [ 206 / 12533, 20 ins, 58 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it6/wer_5
# CNN systems (Karel - 25.9.2014)
%WER 1.89 [ 237 / 12533, 30 ins, 47 del, 160 sub ] exp/cnn4c/decode/wer_3 # per-frame training
# 2D-CNN system (from Harish Mallidi, run by Karel - 22.6.2015)
%WER 2.07 [ 260 / 12533, 32 ins, 60 del, 168 sub ] exp/cnn2d4c/decode/wer_4_0.0 # per-frame training
# Some system combination experiments.
%WER 3.18 [ 398 / 12533, 60 ins, 75 del, 263 sub ] exp/combine_1_2a/decode/wer_4
......@@ -248,10 +230,34 @@ for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/
%WER 7.33 [ 919 / 12533, 80 ins, 153 del, 686 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch3/wer_13
%WER 7.36 [ 923 / 12533, 85 ins, 148 del, 690 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch4/wer_13
### nnet1 results ###
# DNN systems (Karel - 25.9.2014)
# Per-frame cross-entropy training
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
%WER 7.77 [ 974 / 12533, 81 ins, 158 del, 735 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_7
# Sequence-based sMBR training
%WER 1.61 [ 202 / 12533, 32 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_3
%WER 1.62 [ 203 / 12533, 33 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it2/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it3/wer_3
%WER 1.64 [ 206 / 12533, 32 ins, 42 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it4/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 41 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it5/wer_3
%WER 1.64 [ 206 / 12533, 20 ins, 58 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it6/wer_5
# CNN systems (Karel - 25.9.2014)
%WER 1.89 [ 237 / 12533, 30 ins, 47 del, 160 sub ] exp/cnn4c/decode/wer_3 # per-frame training
# 2D-CNN system (from Harish Mallidi, run by Karel - 22.6.2015)
%WER 2.07 [ 260 / 12533, 32 ins, 60 del, 168 sub ] exp/cnn2d4c/decode/wer_4_0.0 # per-frame training
# Joint training with WSJ data, FBANK+pitch features. 2 softmax layers, multitask training,
# (Karel - 10.7.2015)
%WER 1.52 [ 191 / 12533, 17 ins, 52 del, 122 sub ] exp/dnn4e-fbank_blocksoftmax/decode/wer_4_0.5
%WER 7.86 [ 985 / 12533, 84 ins, 160 del, 741 sub ] exp/dnn4e-fbank_blocksoftmax/decode_ug/wer_8_0.0
# LSTM result
for x in exp/lstm4f/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
%WER 2.04 [ 256 / 12533, 18 ins, 60 del, 178 sub ] exp/lstm4f_c512_r200_c512_r200_lr0.0001_mmt0.9_clip50/decode/wer_4_0.5
# BLSTM result
%WER 2.09 [ 262 / 12533, 25 ins, 69 del, 168 sub ] exp/blstm4g/decode/wer_4_0.0
### nnet1 results, the end ###
......@@ -100,7 +100,7 @@ if [ $stage -le 2 ]; then
--delta-opts "--delta-order=2" --splice 5 \
--labels "scp:$dir/pasted_post.scp" --num-tgt $output_dim \
--proto-opts "--block-softmax-dims='$ali1_dim:$ali2_dim'" \
--train-tool "nnet-train-frmshuff --objective-function 'multitask,xent,$ali1_dim,$objw1,xent,$ali2_dim,$objw2'" \
--train-tool "nnet-train-frmshuff --objective-function=multitask,xent,$ali1_dim,$objw1,xent,$ali2_dim,$objw2" \
--learn-rate 0.008 \
${train_tr90_wsj} ${train}_cv10 lang-dummy ali-dummy ali-dummy $dir
# Create files used in decdoing, missing due to --labels use,
......
......@@ -38,7 +38,7 @@ namespace nnet1 {
template <typename T>
inline void CountCorrectFramesWeighted(const CuArray<T> &v1,
const CuArray<T> &v2,
const VectorBase<BaseFloat> &weights,
const CuVectorBase<BaseFloat> &weights,
double *correct) {
KALDI_ASSERT(v1.Dim() == v2.Dim());
KALDI_ASSERT(v1.Dim() == weights.Dim());
......@@ -47,10 +47,12 @@ inline void CountCorrectFramesWeighted(const CuArray<T> &v1,
std::vector<T> v1_h(dim), v2_h(dim);
v1.CopyToVec(&v1_h);
v2.CopyToVec(&v2_h);
Vector<BaseFloat> w(dim);
weights.CopyToVec(&w);
// Get correct frame count (weighted),
double corr = 0.0;
for (int32 i=0; i<dim; i++) {
corr += weights(i) * (v1_h[i] == v2_h[i] ? 1.0 : 0.0);
corr += w(i) * (v1_h[i] == v2_h[i] ? 1.0 : 0.0);
}
// Return,
(*correct) = corr;
......@@ -70,9 +72,6 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
KALDI_ASSERT(KALDI_ISFINITE(net_out.Sum()));
KALDI_ASSERT(KALDI_ISFINITE(targets.Sum()));
double num_frames = frame_weights.Sum();
KALDI_ASSERT(num_frames >= 0.0);
// get frame_weights to GPU,
frame_weights_ = frame_weights;
......@@ -84,6 +83,10 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
target_sum_.AddColSumMat(1.0, targets, 0.0);
frame_weights_.MulElements(target_sum_);
// get the number of frames after the masking,
double num_frames = frame_weights_.Sum();
KALDI_ASSERT(num_frames >= 0.0);
// compute derivative wrt. activations of last layer of neurons,
*diff = net_out;
diff->AddMat(-1.0, targets);
......@@ -93,7 +96,7 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
double correct;
net_out.FindRowMaxId(&max_id_out_); // find max in nn-output
targets.FindRowMaxId(&max_id_tgt_); // find max in targets
CountCorrectFramesWeighted(max_id_out_, max_id_tgt_, frame_weights, &correct);
CountCorrectFramesWeighted(max_id_out_, max_id_tgt_, frame_weights_, &correct);
// calculate cross_entropy (in GPU),
xentropy_aux_ = net_out; // y
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment