Commit 73abbf68 authored by Ho Yin Chan's avatar Ho Yin Chan
Browse files

trunk:egs/hkust update on some recent experiments

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3036 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 8beace2a
......@@ -43,8 +43,13 @@ nnet_8m_6l/decode_eval_iter290/cer_10:%CER 26.37 [ 1994 / 7562, 410 ins, 572 del
nnet_8m_6l/decode_eval/cer_10:%CER 25.55 [ 1932 / 7562, 405 ins, 549 del, 978 sub ] # 6 layers neural network
nnet_8m_6l/decode_wide_eval/cer_10:%CER 24.13 [ 1825 / 7562, 384 ins, 535 del, 906 sub ] # wider decoding beam width and lattice beam
nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758 sub ] # 6 layers neural network (nnet2 script, 1024 neurons)
nnet_tanh_6l/decode_wide_eval/cer_10:%CER 21.22 [ 1605 / 7562, 365 ins, 485 del, 755 sub ] # wider decoding beam width and lattice beam
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # pretrained RBM, cross entropy trained DNN
nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ] # 4 layers neural network
nnet_4m_3l/decode_wide_eval/cer_10:%CER 22.16 [ 1676 / 7562, 365 ins, 505 del, 806 sub ] # wider decoding beam width and lattice beam
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # 6 layers DNN - pretrained RBM, cross entropy trained DNN
tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN
tri5a_pretrain-dbn_dnn_smbr/decode_it2/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_it3/cer_10:%CER 18.62 [ 1408 / 7562, 313 ins, 446 del, 649 sub ]
......@@ -96,6 +101,10 @@ nnet_8m_6l/decode_eval_closelm_iter290/cer_10:%CER 20.40 [ 1543 / 7562, 323 ins,
nnet_8m_6l/decode_eval_closelm/cer_10:%CER 20.68 [ 1564 / 7562, 351 ins, 483 del, 730 sub ]
nnet_8m_6l/decode_wide_eval_closelm/cer_10:%CER 17.87 [ 1351 / 7562, 343 ins, 453 del, 555 sub ]
nnet_tanh_6l/decode_eval_closelm/cer_10:%CER 17.10 [ 1293 / 7562, 337 ins, 448 del, 508 sub ]
nnet_tanh_6l/decode_wide_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 336 ins, 452 del, 509 sub ]
nnet_4m_3l/decode_eval_closelm/cer_10:%CER 17.15 [ 1297 / 7562, 335 ins, 439 del, 523 sub ]
nnet_4m_3l/decode_wide_eval_closelm/cer_10:%CER 17.02 [ 1287 / 7562, 330 ins, 436 del, 521 sub ]
tri5a_pretrain-dbn_dnn/decode_closelm/cer_10:%CER 16.54 [ 1251 / 7562, 346 ins, 413 del, 492 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1/cer_10:%CER 15.31 [ 1158 / 7562, 280 ins, 410 del, 468 sub ]
......
#!/bin/bash
# Daniel Povey (Johns Hopkins University) 's style DNN training
#
# Prepared by Ricky Chan Ho Yin (Hong Kong University of Science and Technology)
#
# Apache License, 2.0
. cmd.sh
. path.sh
ulimit -u 10000
(
steps/nnet2/train_tanh.sh \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-hidden-layers 6 --hidden-layer-dim 1024 \
--cmd "$decode_cmd" \
data/train data/lang exp/tri5a_ali_dt100k exp/nnet_tanh_6l || exit 1
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_6l/decode_eval &
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_6l/decode_eval_closelm &
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_tanh_6l/decode_wide_eval &
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_tanh_6l/decode_wide_eval_closelm &
wait
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_6l/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_eval_closelm
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_tanh_6l/decode_wide_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_wide_eval_closelm
)
......@@ -6,6 +6,8 @@
. cmd.sh
##### Data Preparation Stage #####
mkdir data data/train data/eval
### Data preparation - Training data, evaluation data. Please refer http://kaldi.sourceforge.net/data_prep.html as well
......@@ -29,7 +31,9 @@ steps/compute_cmvn_stats.sh data/eval exp/make_mfcc/eval $mfccdir || exit 1;
utils/fix_data_dir.sh data/eval
### We start acoustic model training here, build from HMM-GMM
##### We start acoustic model training here, build from GMM-HMM #####
### Mono phone training
steps/train_mono.sh --nj 20 --cmd "$train_cmd" data/train data/lang exp/mono0a || exit 1;
steps/align_si.sh --nj 30 --cmd "$train_cmd" data/train data/lang exp/mono0a exp/mono0a_ali
......@@ -126,7 +130,10 @@ steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $n --transform-dir exp/t
steps/decode_sgmm_rescore.sh --cmd "$decode_cmd" --iter $n --transform-dir exp/tri5a/decode_eval data/lang_test data/eval exp/sgmm_5a/decode_eval exp/sgmm_5a_mmi_b0.1/decode_eval$n
done
### Neural Network (on top of LDA+MLLT+SAT model)
##### Neural Network (on top of LDA+MLLT+SAT model)#####
### 6 hidden layers neural network, tanh non-linearity
steps/train_nnet_cpu.sh --mix-up 8000 --initial-learning-rate 0.01 --final-learning-rate 0.001 --num-jobs-nnet 16 --num-hidden-layers 6 --num-parameters 8000000 --cmd "$decode_cmd" data/train data/lang exp/tri5a exp/nnet_8m_6l
# decoding on final model for NN
......@@ -143,11 +150,23 @@ done
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_8m_6l/decode_wide_eval
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_8m_6l/decode_wide_eval_closelm
# GPU based DNN traing, this was run on CentOS 6.4 with CUDA 5.0
# 6 layers DNN pretrained with restricted boltzmann machine, frame level cross entropy training, sequence discriminative training with sMBR criterion
# alternative neural network training script (6 hidden layers, 1024 neurons)
local/run_tanh.sh
### Half parameters compare to previous => 3 hidden layers, 4 million parameters
steps/train_nnet_cpu.sh --mix-up 8000 --initial-learning-rate 0.01 --final-learning-rate 0.001 --num-jobs-nnet 16 --num-hidden-layers 3 --num-parameters 4000000 --cmd "$decode_cmd" data/train data/lang exp/tri5a exp/nnet_4m_3l
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_4m_3l/decode_eval
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_4m_3l/decode_eval_closelm
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval exp/tri5a/graph data/eval exp/nnet_4m_3l/decode_wide_eval
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 2 --config conf/decode_wide.config --transform-dir exp/tri5a/decode_eval_closelm exp/tri5a/graph_closelm data/eval exp/nnet_4m_3l/decode_wide_eval_closelm
## GPU based DNN traing, this was run on CentOS 6.4 with CUDA 5.0
## 6 layers DNN pretrained with restricted boltzmann machine, frame level cross entropy training, sequence discriminative training with sMBR criterion
local/run_dnn.sh
# decoding was run by CPUs
# decoding using DNN with cross-entropy training
## decoding was run by CPUs
## decoding using DNN with cross-entropy training
dir=exp/tri5a_pretrain-dbn_dnn
steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 exp/tri5a/graph data-fmllr-tri5a/test $dir/decode || exit 1;
steps/decode_nnet.sh --nj 2 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 exp/tri5a/graph_closelm data-fmllr-tri5a/test $dir/decode_closelm || exit 1;
......@@ -160,6 +179,7 @@ done
### Scoring ###
# GMM-HMM
local/ext/score.sh data/eval exp/tri1/graph exp/tri1/decode_eval
local/ext/score.sh data/eval exp/tri1/graph_closelm exp/tri1/decode_eval_closelm
......@@ -195,6 +215,7 @@ for n in 1 2 3 4; do
local/ext/score.sh data/eval exp/sgmm_5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode_eval_closelm$n;
done
# DNN-HMM
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_8m_6l/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_eval_closelm
......@@ -206,6 +227,11 @@ done
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_8m_6l/decode_wide_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_wide_eval_closelm
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_4m_3l/decode_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_4m_3l/decode_eval_closelm
local/ext/score.sh data/eval exp/tri5a/graph exp/nnet_4m_3l/decode_wide_eval
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_4m_3l/decode_wide_eval_closelm
local/ext/score.sh data/eval exp/tri5a/graph exp/tri5a_pretrain-dbn_dnn/decode
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn/decode_closelm
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment