Commit 5f397825 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: committing a bunch of changes to neural-network training setup...

sandbox/dan2: committing a bunch of changes to neural-network training setup which are not yet ready to merge to trunk.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@2777 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 446c428b
......@@ -138,3 +138,4 @@ exit 0
%WER 1.60 [ 200 / 12533, 26 ins, 34 del, 140 sub ] exp/combine_sgmm2x_4a_3b/decode/wer_2
%WER 1.51 [ 189 / 12533, 23 ins, 34 del, 132 sub ] exp/combine_sgmm2x_4a_3b_fmmic5/decode/wer_4
%WER 1.48 [ 186 / 12533, 24 ins, 29 del, 133 sub ] exp/combine_sgmm2x_4a_mmi_3b_fmmic5/decode/wer_4
#!/bin/bash
#
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Takes no arguments.
tmpdir=data/local/tmp
[ ! -f $tmpdir/G.txt ] && echo "No such file $tmpdir/G.txt" && exit 1;
. ./path.sh || exit 1; # for KALDI_ROOT
cp -rT data/lang data/lang_ug
rm -rf data/lang_ug/tmp
cat data/train/text | \
perl -e 'while(<>) { @A = split; shift @A; foreach $w(@A) { $tot_count++; $count{$w}++; } $n_sent++; }
$tot_count += $n_sent;
foreach $k (keys %count) { $p = $count{$k} / $tot_count; $cost = -log($p); print "0 0 $k $k $cost\n"; }
$final_cost = -log($n_sent / $tot_count);
print "0 $final_cost\n"; ' | \
fstcompile --isymbols=data/lang/words.txt --osymbols=data/lang/words.txt --keep_isymbols=false \
--keep_osymbols=false > data/lang_ug/G.fst || exit 1;
# Checking that G is stochastic [note, it wouldn't be for an Arpa]
fstisstochastic data/lang_ug/G.fst || echo Error: G is not stochastic
# Checking that G.fst is determinizable.
fstdeterminize data/lang_ug/G.fst /dev/null || echo Error determinizing G.
# Checking that L_disambig.fst is determinizable.
fstdeterminize data/lang_ug/L_disambig.fst /dev/null || echo Error determinizing L.
# Checking that disambiguated lexicon times G is determinizable
fsttablecompose data/lang_ug/L_disambig.fst data/lang_ug/G.fst | \
fstdeterminize >/dev/null || echo Error
# Checking that LG is stochastic:
fsttablecompose data/lang_ug/L.fst data/lang_ug/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
# Checking that L_disambig.G is stochastic:
fsttablecompose data/lang_ug/L_disambig.fst data/lang_ug/G.fst | \
fstisstochastic || echo Error: LG is not stochastic.
echo "Succeeded preparing grammar for RM."
#!/bin/bash
# "nnet2" is the new name for what used to be called the "nnet-cpu" code, and this
# script will eventually supersede run_nnet_cpu.sh [It's Dan's version of neural
# network training].
# We start from tri3c which is "raw-fMLLR" (a model with regular LDA+MLLT, but where
# the fMLLR is done in the space of the original features).
. cmd.sh
# The first training is with a small hidden-layer-dim and few epochs, just to
# get a good point to optimize from.
steps/nnet2/train_tanh.sh --num-epochs 4 --num-epochs-extra 2 --splice-width 7 \
--cleanup false \
--num-hidden-layers 3 --hidden-layer-dim 256 --add-layers-period 1 --cmd "$decode_cmd" \
data/train data/lang exp/tri3c_ali exp/nnet4c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet4c1/decode
steps/nnet2/retrain_tanh.sh --num-epochs 10 --num-epochs-extra 10 \
--initial-learning-rate 0.08 --final-learning-rate 0.008 \
--widen 400 --cmd "$decode_cmd" exp/nnet4c1/egs exp/nnet4c1 exp/nnet5c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet5c1/decode
steps/nnet2/retrain_tanh.sh --num-epochs 10 --num-epochs-extra 10 \
--mix-up 4000 --initial-learning-rate 0.08 --final-learning-rate 0.008 \
--cmd "$decode_cmd" exp/nnet4c1/egs exp/nnet5c1 exp/nnet6c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet6c1/decode
steps/nnet2/align.sh --transform-dir exp/tri3c --nj 8 \
--cmd "$decode_cmd" \
data/train data/lang exp/nnet6c1 exp/nnet6c1_ali
steps/nnet2/get_egs.sh --cmd "$decode_cmd" --splice-width 7 \
--transform-dir exp/tri3c/ \
data/train data/lang exp/nnet6c1_ali exp/nnet6c1_realigned_egs
steps/nnet2/retrain_tanh.sh --num-epochs 5 --num-epochs-extra 10 \
--initial-learning-rate 0.04 --final-learning-rate 0.008 \
--cmd "$decode_cmd" exp/nnet6c1_realigned_egs/egs exp/nnet6c1 exp/nnet7c1
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode \
exp/tri3c/graph data/test exp/nnet7c1/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3c/decode_ug \
exp/tri3c/graph_ug data/test exp/nnet7c1/decode_ug
exit 0;
# using conf/decode.config as we need much larger beams for RM.
steps/make_denlats_nnet_cpu.sh --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_denlats
steps/train_nnet_cpu_mmi.sh --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_a
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_a/decode
(
steps/train_nnet_cpu_mmi.sh --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_b
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_b/decode
)&
# Get WER on training data before MMI.
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_nnet/decode_train
# WER on tri3b as baseline, want to see how it compares to tri3b_mmi
steps/decode.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri3b/decode_train
steps/decode.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri3b_mmi/decode_train
(
steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_c
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_c/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_c/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.5 --boost 0.1 --initial-learning-rate 0.0005 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_d
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_d/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_d/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.5 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_e
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_e/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_e/decode_train
)&
( # _e2 is as _e, but 2 epochs per EBW iter.
steps/train_nnet_cpu_mmi.sh --epochs-per-ebw-iter 2 --E 0.5 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_e2
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_e2/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_e2/decode_train
)&
( # With E = 0.0 it was terrible. WER is 12.5%
steps/train_nnet_cpu_mmi.sh --E 0.0 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_f
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_f/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_f/decode_train
)&
(
steps/train_nnet_cpu_mmi.sh --E 0.25 --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri3b_ali \
data/train data/lang exp/tri4a1_nnet exp/tri4a1_nnet exp/tri4a1_denlats exp/tri4a1_mmi_g
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri4a1_mmi_g/decode
# WER on trainnig data
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--config conf/decode.config --transform-dir exp/tri3b \
exp/tri3b/graph data/train exp/tri4a1_mmi_g/decode_train
)&
#!/bin/bash
. cmd.sh
steps/align_raw_fmllr.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
data/train data/lang exp/tri2b exp/tri2b_ali_raw
steps/train_raw_sat.sh 1800 9000 data/train data/lang exp/tri2b_ali_raw exp/tri3c || exit 1;
utils/mkgraph.sh data/lang exp/tri3c exp/tri3c/graph
utils/mkgraph.sh data/lang_ug exp/tri3c exp/tri3c/graph_ug
steps/decode_raw_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3c/graph data/test exp/tri3c/decode
steps/decode_raw_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3c/graph_ug data/test exp/tri3c/decode_ug
steps/decode_raw_fmllr.sh --use-normal-fmllr true --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3c/graph data/test exp/tri3c/decode_2fmllr
steps/decode_raw_fmllr.sh --use-normal-fmllr true --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3c/graph_ug data/test exp/tri3c/decode_2fmllr_ug
steps/align_raw_fmllr.sh --nj 8 --cmd "$train_cmd" data/train data/lang exp/tri3c exp/tri3c_ali
## SGMM on top of LDA+MLLT+SAT features.
## No-- this wasn't working because scripts don't support raw-fMLLR.
if [ ! -f exp/ubm4c/final.mdl ]; then
steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 400 data/train data/lang exp/tri3c_ali exp/ubm4c || exit 1;
fi
steps/train_sgmm2.sh --cmd "$train_cmd" 5000 7000 data/train data/lang exp/tri3c_ali exp/ubm4c/final.ubm exp/sgmm2_4c || exit 1;
utils/mkgraph.sh data/lang exp/sgmm2_4c exp/sgmm2_4c/graph || exit 1;
utils/mkgraph.sh data/lang_ug exp/sgmm2_4c exp/sgmm2_4c/graph_ug || exit 1;
steps/decode_sgmm2.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri3c/decode exp/sgmm2_4c/graph data/test exp/sgmm2_4c/decode || exit 1;
steps/decode_sgmm2.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri3c/decode_ug exp/sgmm2_4c/graph_ug data/test exp/sgmm2_4c/decode_ug || exit 1;
steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
--transform-dir exp/tri3c/decode exp/sgmm2_4c/graph data/test exp/sgmm2_4c/decode_fmllr || exit 1;
(# get scaled-by-30 versions of the vecs to be used for nnet training.
mkdir -p exp/sgmm2_4c_x30
cat exp/sgmm2_4c/vecs.* | copy-vector ark:- ark,t:- | \
awk -v scale=30.0 '{printf("%s [ ", $1); for (n=3;n<NF;n++) { printf("%f ", scale*$n); } print "]"; }' > exp/sgmm2_4c_x30/vecs.1
mkdir -p exp/sgmm2_4c_x30/decode
cat exp/sgmm2_4c/decode/vecs.* | copy-vector ark:- ark,t:- | \
awk -v scale=30.0 '{printf("%s [ ", $1); for (n=3;n<NF;n++) { printf("%f ", scale*$n); } print "]"; }' > exp/sgmm2_4c_x30/decode/vecs.1
mkdir -p exp/sgmm2_4c_x30/decode_ug
cat exp/sgmm2_4c/decode_ug/vecs.* | copy-vector ark:- ark,t:- | \
awk -v scale=30.0 '{printf("%s [ ", $1); for (n=3;n<NF;n++) { printf("%f ", scale*$n); } print "]"; }' > exp/sgmm2_4c_x30/decode_ug/vecs.1
)
exit 0;
##
steps/decode_sgmm2.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/sgmm2_4c.no_transform/graph data/test exp/sgmm2_4c.no_transform/decode || exit 1;
steps/decode_sgmm2.sh --use-fmllr true --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/sgmm2_4c.no_transform/graph data/test exp/sgmm2_4c.no_transform/decode_fmllr || exit 1;
#!/bin/bash
# Multilingual setup for SGMMs.
# Caution: this is just a stub, intended to show some others what to do, it
# is not functional yet.
# We treat the WSJ setup as the "other language"-- in fact it's the same language,
# of course, but we treat the phones there as a distinct set.
# The only important thing is that the WSJ data has the same sample rate as the
# RM data.
# add the prefix to all the words and phones:
mkdir -p data_ml exp_ml # ml stands for "multilingual"
utils/add_lang_prefix.sh data/lang rm: data_ml/lang_rm
utils/add_lang_prefix.sh ../../wsj/s5/data/lang wsj: data_ml/lang_wsj
# add the prefix to all the words, utterance-ids, and speaker-ids.
utils/add_data_prefix.sh data/train rm: data_ml/train_rm
utils/add_data_prefix.sh ../../wsj/s5/data/train_si284 wsj: data_ml/train_si284_wsj
# Merge the "lang" directories. This will change the phones.txt and words.txt,
# to incorporate all the symbols in the original setups.
utils/merge_lang.sh data_ml/lang_rm data_ml/lang_wsj data_ml/lang_rm_wsj
utils/combine_data.sh data_ml/train_rm data_ml/train_si284_wsj data_ml/train
# the call to utils/convert_models.sh below will
# convert the RM LDA+MLLT system to use the new "lang" directory.
# This script converts the models in the directory to use the new integer values
# for the phones, as in data/lang_rm_wsj.
# Everything else will be copied. The only thing changed in the models is
# the transition-ids. We'll need a program call like
# gmm-convert <phone-map-file> <model-in> <model-out>
# where each line of phone-map-file has two lines, (phone-in phone-out).
# This will just affect the transition model, by mapping all the phone-ids.
# We'll also need a program
# convert-tree <phone-map-file> <tree-in> <tree-out>
utils/convert_models.sh exp/tri2b data_ml/lang_rm exp_ml/tri2b_rm data_ml/lang_rm_wsj
utils/convert_models.sh ../../wsj/exp/tri4b data_ml/lang_wsj exp_ml/tri4b_wsj data_ml/lang
# Re-do the alignment of the RM tri2b setup with the converted models
# (this avoids the hassle of converting the alignment.)
steps/align_si.sh --nj 8 --cmd "$train_cmd" data_ml/train_rm data_ml/lang exp_ml/tri2b_rm \
exp_ml/tri2b_rm_ali || exit 1;
# Now, starting from those alignments train an RM system with the same LDA+MLLT
# matrix as the WSJ system. The training script takes this from the alignment directory,
# so it's sufficient to put it there:
cp exp_ml/tri4b_wsj/final.mat exp_ml/tri2b_rm_ali/final.mat
steps/train_sat.sh 1800 9000 data/train data/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm_ali || exit 1;
# Train an LDA+MLLT+SAT system for RM that uses the same LDA+MLLT transforms as for WSJ.
steps/train_sat.sh 1800 9000 data_ml/train_rm data_ml/lang exp_ml/tri2b_rm_ali exp_ml/tri3b_rm || exit 1;
# Now merge the RM and WSJ models. This will create trees and transition-models
# that handle the two (disjoint) sets of phones that the RM and WSJ models
# contain. We'll need a program "merge-tree" and a program "gmm-merge". The
# "merge-tree" program will need, for each tree, a record of which sets of
# phones it was supposed to handle, since this is not recorded in the tree
# itself-- we can get this from the transition models which do record this.
# probably the "merge-tree" program will have usage:
# merge-tree <tree1> <phone-set-1> <tree2> <phone-set-2> ... <tree-out>
# where the phone-set-n's will probably be filenames that contain lists of
# the phones.
# The "gmm-merge" program will have the usage:
# gmm-merge <model1> <model2> ... <model-out>
steps/merge_models.sh data_ml/tri3b_rm data_ml/tri4b_wsj data_ml/tri4b
steps/align_fmllr.sh --nj 32 --cmd "$train_cmd" data_ml/train data_ml/lang exp_ml/tri4b \
exp_ml/tri4b_ali || exit 1;
steps/train_ubm.sh --silence-weight 0.5 --cmd "$train_cmd" 600 \
data_ml/train data_ml/lang exp_ml/tri4b_ali exp_ml/ubm5a || exit 1;
# Use slightly larger SGMM parameters than the WSJ setup.
steps/train_sgmm2.sh --cmd "$train_cmd" \
15000 30000 data_ml/train data_ml/lang exp_ml/tri4b_ali \
exp_ml/ubm5a/final.ubm exp_ml/sgmm2_5a || exit 1;
# This convert_models.sh script will also have the effect of subsetting
# the model, because some of the phones are undefined in the destination.
# We should make sure that the programs "gmm-convert" and "convert-tree"
# accept a phone map that does not map all of the phones we have-- it would
# delete those phones. The --reduce option to the script would be passed
# into those programs, and confirm to them that that's "really" what we want
# to do.
utils/convert_models.sh --reduce true exp_ml/sgmm2_5a data_ml/lang exp/sgmm2_5c_ml data/lang
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_tgpr
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
exp/sgmm2_5c_ml/graph_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
exp/sgmm2_5c_ml/graph_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_tgpr_eval92
utils/mkgraph.sh data/lang_test_bd_tgpr exp/sgmm2_5c_ml exp/sgmm2_5c_ml/graph_bd_tgpr || exit 1;
steps/decode_sgmm2.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/sgmm2_5c_ml/graph_bd_tgpr data/test_dev93 exp/sgmm2_5c_ml/decode_bd_tgpr_dev93
steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/sgmm2_5c_ml/graph_bd_tgpr data/test_eval92 exp/sgmm2_5c_ml/decode_bd_tgpr_eval92
) &
......@@ -13,7 +13,9 @@ local/rm_data_prep.sh /export/corpora5/LDC/LDC93S3A/rm_comp || exit 1;
utils/prepare_lang.sh data/local/dict '!SIL' data/local/lang data/lang || exit 1;
local/rm_prepare_grammar.sh || exit 1;
local/rm_prepare_grammar.sh || exit 1; # Traditional RM grammar (bigram word-pair)
local/rm_prepare_grammar_ug.sh || exit 1; # Unigram grammar (gives worse results, but
# changes in WER will be more significant.)
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
......@@ -117,6 +119,11 @@ utils/mkgraph.sh data/lang exp/tri3b exp/tri3b/graph || exit 1;
steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3b/graph data/test exp/tri3b/decode || exit 1;
(
utils/mkgraph.sh data/lang_ug exp/tri3b exp/tri3b/graph_ug || exit 1;
steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3b/graph_ug data/test exp/tri3b/decode_ug || exit 1;
)
# Align all data with LDA+MLLT+SAT system (tri3b)
......@@ -174,10 +181,10 @@ done
# Demo of "raw fMLLR"
# local/run_raw_fmllr.sh
# You don't have to run all 3 of the below, e.g. you can just run the run_sgmm2x.sh
local/run_sgmm.sh
# You don't have to run all 3 of the below, e.g. you can just run the run_sgmm2.sh
#local/run_sgmm.sh
local/run_sgmm2.sh
local/run_sgmm2x.sh
#local/run_sgmm2x.sh
# you can do:
# local/run_nnet_cpu.sh
......@@ -3,7 +3,7 @@
. cmd.sh
(
steps/train_nnet_cpu.sh \
steps/train_nnet_cpu.sh --stage 28 \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 16 --num-hidden-layers 4 \
......@@ -53,4 +53,4 @@
# 0.1% worse on train_dev, 0.1% better on eval2000.
#exp/nnet6b/decode_train_dev/wer_9:%WER 24.94 [ 12087 / 48460, 1769 ins, 2788 del, 7530 sub ]
#exp/nnet6b/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.2 15.9 6.9 2.7 25.5 62.4 |
\ No newline at end of file
#exp/nnet6b/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.2 15.9 6.9 2.7 25.5 62.4 |
#!/bin/bash
# Warning-- this recipe is deprecated. See ../s5b/ for the latest recipe.
. cmd.sh
exit 1;
......@@ -8,7 +10,6 @@ exit 1;
# Caution: some of the graph creation steps use quite a bit of memory, so you
# should run this on a machine that has sufficient memory.
# Data prep
#local/swbd_p1_data_prep.sh /mnt/matylda2/data/SWITCHBOARD_1R2
......@@ -33,7 +34,7 @@ local/eval2000_data_prep.sh /data/corpora0/LDC2002S09/hub5e_00 /data/corpora0/L
. cmd.sh
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcc
mfccdir=`pwd`/mfcc
steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/train exp/make_mfcc/train $mfccdir || exit 1;
# Don't do "|| exit 1" because actually some speakers don't have data,
......
......@@ -16,7 +16,6 @@
. ./path.sh ## Source the tools/utils (import the queue.pl)
###
### We save the fMLLR features, so we can train on them easily
###
......
......@@ -18,18 +18,17 @@
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 8 \
--transform-dir exp/tri4b/decode_bd_tgpr_dev93 \
exp/tri4b/graph_bd_tgpr data/test_dev93 exp/nnet5c1/decode_bd_tgpr_dev93
--transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/nnet5c1/decode_bd_tgpr_eval92
)
(
steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.001 \
--minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
data/train data/lang exp/tri5c1_nnet exp/tri5c1_nnet exp/tri5c1_denlats exp/tri5c1_mmi_a
# (
# steps/train_nnet_cpu_mmi.sh --boost 0.1 --initial-learning-rate 0.001 \
# --minibatch-size 128 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali_si284 \
# data/train data/lang exp/tri5c1_nnet exp/tri5c1_nnet exp/tri5c1_denlats exp/tri5c1_mmi_a
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/tri5c1_mmi_a/decode
)&
# steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 20 \
# --transform-dir exp/tri3b/decode \
# exp/tri3b/graph data/test exp/tri5c1_mmi_a/decode
# )&
......@@ -44,6 +44,19 @@ steps/decode_raw_fmllr.sh --cmd "$decode_cmd" --nj 8 exp/tri3c/graph_bd_tgpr \
data/test_dev93 exp/tri3c/decode_bd_tgpr_dev93
)&
steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri3c exp/tri3c_ali_si284 || exit 1;
steps/train_raw_sat.sh --cmd "$train_cmd" \
4200 40000 data/train_si284 data/lang exp/tri3c_ali_si284 exp/tri4d || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/tri4d exp/tri4d/graph_tgpr || exit 1;
steps/decode_raw_fmllr.sh --nj 10 --cmd "$decode_cmd" \
exp/tri4d/graph_tgpr data/test_dev93 exp/tri4d/decode_tgpr_dev93 || exit 1;
steps/decode_raw_fmllr.sh --nj 8 --cmd "$decode_cmd" \
exp/tri4d/graph_tgpr data/test_eval92 exp/tri4d/decode_tgpr_eval92 || exit 1;
) &
wait
......
......@@ -68,7 +68,7 @@ echo "$0: feature type is $feat_type"
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
cp $srcdir/final.mat $dir
cp $srcdir/final.mat $srcdir/full.mat $dir
;;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
......
......@@ -25,8 +25,8 @@ echo "$0 $@" # Print the command line for logging
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "usage: steps/align_si.sh <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: steps/align_si.sh data/train data/lang exp/tri1 exp/tri1_ali"
echo "usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: $0 data/train data/lang exp/tri1 exp/tri1_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
......
......@@ -82,10 +82,17 @@ case $feat_type in
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
if [ "$feat_type" == "raw" ]; then
[ ! -f $transform_dir/raw_trans.1 ] && echo "$0: no such file $transform_dir/raw_trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/raw_trans.JOB ark:- ark:- |"
else
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
fi
elif grep 'transform-feats --utt2spk' $srcdir/log/train.1.log >&/dev/null; then
echo "$0: **WARNING**: you seem to be using a neural net system trained with transforms,"
echo " but you are not providing the --transform-dir option in test time."
......
......@@ -87,11 +87,17 @@ case $feat_type in
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -f $transform_dir/trans.1 ] && echo "$0: no such file $transform_dir/trans.1" && exit 1;
[ "$nj" -ne "`cat $transform_dir/num_jobs`" ] \
&& echo "$0: #jobs mismatch with transform-dir." && exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
if [ -f $transform_dir/trans.1 ]; then
echo "$0: using transforms from $transform_dir"
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark,s,cs:$transform_dir/trans.JOB ark:- ark:- |"
elif [ -f $transform_dir/raw_trans.1 ]; then