Commit 24bd1bef authored by Abdelwahab HEBA's avatar Abdelwahab HEBA
Browse files

clean scoring & add noise labelisation

parent dc5d4274
......@@ -45119,6 +45119,7 @@ hdw aa ch dd ei dd ou bb ll vv ei
hdz aa ch dd ai zz ai dd
hdz(2) aa ch dd ei zz ai dd
he ee
he' ee
head ii dd
health ai ll ff
hearst oe rr ss tt
......@@ -102183,6 +102184,7 @@ zülle zz uu ll
âpreté aa pp rr ee tt ei
âtre aa tt rr
âtre(2) aa tt rr ee
ç ss
ça ss aa
çà ss aa
çà_et_là ss aa ei ll aa
......@@ -43,7 +43,8 @@ trap "rm -r $tmpdir" EXIT
mkdir -p $tmpdir
#for lm_suffix in tgsmall tgmed tglarge fglarge; do
for lm_suffix in tglarge french-small tgmix; do
for lm_suffix in tglarge french-small; do
#for lm_suffix in linto1; do
# tglarge is prepared by a separate command, called from run.sh; we don't
# want to compile G.fst for tglarge, as it takes a while.
test=${src_dir}_test_${lm_suffix}
......
......@@ -253,7 +253,7 @@ if __name__=="__main__":
#print("Je rentre dans has_attrib_speaker et element.tail not null")
#print(str(Element.tag))
#print(str(Element.tail))
if Element.tag=="Sync" or Element.tag=="Background":
if Element.tag=="Sync":
#print("Je rentre Sync+Background"+ text +"| et le next c'est "+ Element.tail)
#print(Element.tag+" "+Element.tail)
Time_start_current_sync=Element.get('time')
......
......@@ -263,7 +263,7 @@ if __name__=="__main__":
#print("Je rentre dans has_attrib_speaker et element.tail not null")
#print(str(Element.tag))
#print(str(Element.tail))
if Element.tag=="Sync" or Element.tag=="Background":
if Element.tag=="Sync":
#print("Je rentre Sync+Background"+ text +"| et le next c'est "+ Element.tail)
#print(Element.tag+" "+Element.tail)
Time_start_current_sync=Element.get('time')
......@@ -300,27 +300,27 @@ if __name__=="__main__":
# if Element.get('type')=='noise':
# ===== Respiration
if Element.get('desc')=='r' or Element.get('desc')=='i' or Element.get('desc')=='e' or Element.get('desc')=='n':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <breath> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='pf':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <blowshard> "+Element.tail.replace('\n', '')
# ===== Bruits bouches
elif Element.get('desc')=='tx':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <cough> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='bg':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <glottisblow> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='bb':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <noisemouth> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='rire':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <laugh> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='sif':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <whistling> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='ch' or Element.get('desc')=='ch-':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <whisperedvoice> "+Element.tail.replace('\n', '')
# ====== Bruit exterieus a l'acte de parole
elif Element.get('desc')=='b' or Element.get('desc')=='pap' or Element.get('desc')=='mic' or Element.get('desc')=='conv':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <noise> "+Element.tail.replace('\n', '')
elif Element.get('desc')=='top':
text=text+" "+Element.tail.replace('\n', '')
text=text+" <top> "+Element.tail.replace('\n', '')
# "pi" intellegible "pif" inaudible voir doc transcriber
#elif Element.get('type')=='pronounce':
# text=text+" "+Element.tail.replace('\n', '')
......
......@@ -117,6 +117,7 @@ if [ $stage -le 3 ]; then
echo "Preparing phone lists and clustering questions"
(echo SIL; echo SPN; echo NSN; echo LAU;) > $silence_phones
#(echo SIL; echo SPN;) > $silence_phones
echo SIL > $optional_silence
# nonsilence phones; on each line is a list of phones that correspond
# really to the same base phone.
......@@ -140,12 +141,14 @@ if [ $stage -le 3 ]; then
fi
if [ $stage -le 4 ]; then
# TCOF
#(echo '!sil SIL'; echo '<spoken_noise> SPN'; echo '<UNK> SPN'; echo '<laugh> LAU'; echo '<noise> NSN') |\
#(echo '<unk> SPN'; echo '<laugh> LAU'; echo '<noise> NSN'; echo '<top> NSN';\
# echo '<whispered_voice> NSN'; echo '<breath> SPN'; echo '<blows_hard> NSN'; echo '<cough> SPN'; echo '<glottis_blow> SPN';\
# echo '<noise_mouth> SPN';echo '<whistling> NSN' ) |\
# ESTER
(echo '<unk> SPN'; echo '<laugh> LAU'; echo '<noise> NSN'; echo '<top> NSN';\
echo '<whisperedvoice> NSN'; echo '<noisemouth>' SPN; echo '<breath> SPN'; echo '<blowshard> NSN'; echo '<cough> SPN'; echo '<glottisblow> SPN';\
echo '<noisemouth> SPN';echo '<whistling> NSN') |\
# ESTER without noise states
(echo '!sil SIL') |\
#(echo '!sil SIL'; echo '<UNK> SPN') |\
cat - $lexicon_raw_nosil | sort | uniq >$dst_dir/lexicon.txt
echo "Lexicon text file saved as: $dst_dir/lexicon.txt"
fi
......
......@@ -42,11 +42,11 @@ done
mkdir -p $dir/scoring/log
# A changer suivant les balises utilisees dans la normalisation
# TCOF
cat $data/text | sed 's:<noise>::g' | sed 's:<spoken_noise>::g' | sed 's:<laugh>::g' > $dir/scoring/test_filt.txt
#cat $data/text | sed 's:<noise>::g' | sed 's:<spoken_noise>::g' | sed 's:<laugh>::g' > $dir/scoring/test_filt.txt
# ESTER
#cat $data/text | sed 's:<noise>::g' | sed 's:<breath>::g' | sed 's:<laugh>::g' |\
# sed 's:<blows_hard>::g' | sed 's:<cough>::g' | sed 's:<glottis_blow>::g' | sed 's:<noise_mouth>::g' |\
# sed 's:<whistling>::g' | sed 's:<whispered_voice>::g' | sed 's:<top>::g' > $dir/scoring/test_filt.txt
cat $data/text | sed 's:<noise>::g' | sed 's:<breath>::g' | sed 's:<laugh>::g' |\
sed 's:<blowshard>::g' | sed 's:<cough>::g' | sed 's:<glottisblow>::g' | sed 's:<noisemouth>::g' |\
sed 's:<whistling>::g' | sed 's:<whisperedvoice>::g' | sed 's:<top>::g' | sed 's:<breath>::g' > $dir/scoring/test_filt.txt
for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do
$cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/best_path.LMWT.$wip.log \
lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
......
S#!/bin/bash
#!/bin/bash
# Copyright 2017 Abdel HEBA @Linagora
# Pense a ajouter utils/fix_data_dir.sh data/test to fix utterance error
# Running on Koios J=12
# data dir
#
. ./cmd.sh
. ./path.sh
idata_kaldi=data-microsoft-mfcc
exp_kaldi=exp-microsoft-mfcc
# you might not want to do this for interactive shells.
#set -e
# format the data as Kaldi data directories
#train dev
# TCOF
# Data prepare: TCOF - ESTER
# TCOF:
idata_kaldi=data-microsoft-mfcc
exp_kaldi=exp-microsoft-mfcc
data=/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus
LM_train_text=/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus/train
for part in meeting_best_microsoft meeting_test; do
# ESTER:
data=/home/lingora/Documents/Linagora/Data/ESTER/Corpus/ESTER/DGA/Phase1
idata_kaldi=data-ESTER-V2-noise
exp_kaldi=exp-ESTER-V2-noise
for part in data; do
# use underscore-separated names in data directories.
echo "prepare $part"
local/data_prepTCOF.sh $data/$part $idata_kaldi/$part
#local/data_prepTCOF.sh $data/$part $idata_kaldi/$part
# probleme event (URL:)
local/data_prepESTER.sh $data/$part $idata_kaldi/$part
done
# Evaluate SNR for each segment
evaluate_snr=eval-snr
mkdir eval-snr
for part in meeting_best_microsoft meeting_test; do
#evaluate_snr=eval-snr
#mkdir eval-snr
#for part in meeting_best_microsoft meeting_test; do
# echo "Evaluate $part"
# local/evaluation/evaluate_snr.sh $idata_kaldi/$part $evaluate_snr
#done
# Evaluate SNR for each segment of ESTER
evaluate_snr=eval-snr-ESTER/Eval2005
mkdir -p $evaluate_snr
for part in data; do
echo "Evaluate $part"
local/evaluation/evaluate_snr.sh $idata_kaldi/$part $evaluate_snr
done
......@@ -44,33 +53,44 @@ done
# ## well as some intermediate data(e.g. the normalized text used for LM training),
# ## are available for download at http://www.openslr.org/11/
# OOOOOOK Train_lm
# TCOF
#LM_train_text=/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus/train
# ESTER
LM_train_text=/home/lingora/Documents/Linagora/Data/ESTER/Corpus/ESTER/DGA/Phase1/data
local/lm/train_lm.sh $LM_train_text \
$idata_kaldi/local/lm/norm/tmp $idata_kaldi/local/lm/norm/norm_texts $idata_kaldi/local/lm
# check characters:
# awk '{for(i=1;i<=NF;i++)if(!a[$i]++)print $i"\n"}' ORS= FS= $idata_kaldi/local/lm/meeting-vocab.txt | sort -b
# Learning Grapheme to phonem
## Optional G2P training scripts.
## As the LM training scripts above, this script is intended primarily to
## document our G2P model creation process
# OOOOOOk g2p
# OOOOOOk g2p: done
#local/g2p/train_g2p.sh cmu_dict data/local/lm
##### OOOOOOK
# # when "--stage 3" option is used below we skip the G2P steps, and use the
# # if lexicon are already downloaded from Elyes's works then Stage=3 else Stage=0
# print number of phonem used in french
cat cmu_dict/fr.dict | awk '{$1="";print $0}' | tr ' ' '\n' | sort -b | uniq -c
mkdir -p $idata_kaldi/local/dict/cmudict
cp cmu_dict/fr.dict $idata_kaldi/local/dict/fr.dict
#cp cmu_dict/fr.dict data/local/dict/cmudict
local/prepare_dict.sh --stage 3 --nj 1 --cmd "$train_cmd" \
$idata_kaldi/local/lm $idata_kaldi/local/lm $idata_kaldi/local/dict
mkdir -p $idata_kaldi/local/lm/g2p
cp g2p/model-5 $idata_kaldi/local/lm/g2p
###### Prepare dict: add words which doesn't exist in dictionnary + config files...
local/prepare_dict.sh --stage 0 --nj 4 --cmd "$train_cmd" \
$idata_kaldi/local/lm $idata_kaldi/local/lm/g2p $idata_kaldi/local/dict
###### OOOOOOK
utils/prepare_lang.sh $idata_kaldi/local/dict \
"<UNK>" $idata_kaldi/local/lang_tmp $idata_kaldi/lang
"<unk>" $idata_kaldi/local/lang_tmp $idata_kaldi/lang
export LC_ALL=fr_FR.UTF-8
###### OOOOOOK
local/format_lms.sh --src-dir $idata_kaldi/lang $idata_kaldi/local/lm
local/format_lms.sh --src-dir $idata_kaldi/lang $idata_kaldi/local/lm
# # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
#utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \
......@@ -81,7 +101,8 @@ export LC_ALL=fr_FR.UTF-8
mfccdir=mfcc
plpdir=plp
fbankdir=fbank
for part in meeting_best_microsoft meeting_test; do
#for part in meeting_best_microsoft meeting_test; do
for part in data; do
#MFCC features
steps/make_mfcc.sh --cmd "$train_cmd" --nj 4 $idata_kaldi/$part $exp_kaldi/make_mfcc/$part $mfccdir
steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_mfcc/$part $mfccdir
......@@ -92,23 +113,26 @@ for part in meeting_best_microsoft meeting_test; do
#steps/make_fbank.sh --cmd "$train_cmd" --nj 4 $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir
#steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir
done
utils/fix_data_dir.sh $idata_kaldi/data
utils/fix_data_dir.sh $idata_kaldi/meeting_best_microsoft
utils/fix_data_dir.sh $idata_kaldi/meeting_test
# # Make some small data subsets for early system-build stages. Note, there are 29k
# # utterances in the train_clean_100 directory which has 100 hours of data.
# # For the monophone stages we select the shortest utterances, which should make it
# # easier to align the data from a flat start.
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 15000 $idata_kaldi/train_15kshort
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 1000 $idata_kaldi/train_1kshort
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 70000 $idata_kaldi/train_70kshort
#utils/subset_data_dir.sh $idata_kaldi/train 120000 $idata_kaldi/train_120k
#utils/subset_data_dir.sh data/train 120000 data/train_120k
utils/subset_data_dir.sh --shortest $idata_kaldi/data 1000 $idata_kaldi/data_1kshort
utils/subset_data_dir.sh --shortest $idata_kaldi/data 5000 $idata_kaldi/data_5kshort
utils/subset_data_dir.sh --shortest $idata_kaldi/data 10000 $idata_kaldi/data_10kshort
utils/subset_data_dir.sh --shortest $idata_kaldi/data 15000 $idata_kaldi/data_15kshort
utils/subset_data_dir.sh --shortest $idata_kaldi/data 15000 $idata_kaldi/data_15kshort
utils/subset_data_dir.sh $idata_kaldi/data 20000 $idata_kaldi/data_20k
utils/subset_data_dir.sh $idata_kaldi/data 25000 $idata_kaldi/data_25k
# # train a monophone system
exp_mono=$exp_kaldi/mono_selected_microsoft
exp_mono=$exp_kaldi/mono10K
steps/train_mono.sh --boost-silence 1.25 --nj 4 --cmd "$train_cmd" \
$idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_mono
$idata_kaldi/data_10kshort $idata_kaldi/lang $exp_mono
# OK Jusqu'au monophone
# =================================================
# =================================================
# Evaluate PER for each meeting in training set
......@@ -157,8 +181,8 @@ done
echo "Filename,%WER,%nbWER,ins,del,sub" > $dir_evaluation/evaluate_WER/WER_per_meeting.csv
for test in meeting_test; do
# Decode WER
steps/decode.sh --nj 2 --cmd "$decode_cmd" $exp_mono/graph_tglarge \
$idata_kaldi/$test $exp_mono/decode_tglarge_$test
steps/decode.sh --nj 4 --cmd "$decode_cmd" $exp_mono/graph_french-small \
$idata_kaldi/$test $exp_mono/decode_french-small_$test
# Evaluate WER for each meeting in $ test
# symtab=$exp_mono/graph_tglarge/words.txt
# find $data/$test -mindepth 1 -maxdepth 1 -type d > $dir_evaluation/meeting_in_$test.txt
......@@ -193,17 +217,27 @@ cat exp-eval/Evaluation/ppl_only/3gmixfrsmall_dev_test.csv | awk 'BEGIN{FS=",";O
paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/evaluate_3gfrench-small.csv
paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100h.csv \
> exp-eval/Evaluation/Final-eval/final_evaluation_lm_tg_100h.csv
steps/align_si.sh --boost-silence 1.25 --nj 5 --cmd "$train_cmd" \
data-valid/train_file data-valid/lang exp-valid/mono exp-valid/mono_ali
# =========================== TRIPHONE =======================
# Align data
steps/align_si.sh --boost-silence 1.25 --nj 4 --cmd "$train_cmd" \
$idata_kaldi/data_15kshort $idata_kaldi/lang $exp_mono $exp_kaldi/mono_ali15k_model10k
# # train a first delta + delta-delta triphone system on a subset of 70000 utterances
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
2000 10000 $idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_mono $exp_kaldi/tri1_selected
3000 15000 $idata_kaldi/data_15kshort $idata_kaldi/lang $exp_kaldi/mono_ali $exp_kaldi/tri1
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
3000 15000 $idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_kaldi/mono_ali $exp_kaldi/tri1_selected
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
2000 10000 $idata_kaldi/data_20kshort $idata_kaldi/lang_all exp-ESTER-all/mono_ali exp-ESTER-all/tri1_selected
# # decode using the tri1 model
# (
utils/mkgraph.sh $idata_kaldi/lang_test_tglarge \
$exp_kaldi/tri1_selected $exp_kaldi/tri1_selected/graph_tglarge
utils/mkgraph.sh $idata_kaldi/lang_test_french-small \
$exp_kaldi/tri1_selected $exp_kaldi/tri1_selected/graph_french-small
for test in meeting_test; do
steps/decode.sh --nj 2 --cmd "$decode_cmd" $exp_kaldi/tri1_selected/graph_tglarge \
$idata_kaldi/$test $exp_kaldi/tri1_selected/decode_tglarge_$test
......@@ -215,17 +249,20 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
done
# )&
steps/align_si.sh --nj 5 --cmd "$train_cmd" \
data-valid/train_file data-valid/lang exp-valid/tri1 exp-valid/tri1_ali
# ================== Transformation LDA+MLLT ============
utils/subset_data_dir.sh --shortest $idata_kaldi/data 27000 $idata_kaldi/data_27k
steps/align_si.sh --nj 4 --cmd "$train_cmd" \
$idata_kaldi/data_20k $idata_kaldi/lang $exp_kaldi/tri1 $exp_kaldi/tri1_ali
# # train an LDA+MLLT system.
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" 2500 15000 \
$idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_kaldi/tri1_selected $exp_kaldi/tri2b_selected
$idata_kaldi/data_27k $idata_kaldi/lang $exp_kaldi/tri1_selected $exp_kaldi/tri2b_selected
# # decode using the LDA+MLLT model
(
# (
utils/mkgraph.sh $idata_kaldi/lang_test_tglarge \
$exp_kaldi/tri2b_selected $exp_kaldi/tri2b_selected/graph_tglarge
for test in meeting_test; do
......@@ -240,22 +277,26 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
# )&
# # Align a 10k utts subset using the tri2b model
# steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
# data/train_10k data/lang_nosp exp/tri2b exp/tri2b_ali_10k
utils/subset_data_dir.sh --shortest $idata_kaldi/data 31000 $idata_kaldi/data_31k
steps/align_si.sh --nj 4 --cmd "$train_cmd" --use-graphs true \
$idata_kaldi/data_31k $idata_kaldi/lang $exp_kaldi/tri2b_selected $exp_kaldi/tri2b_ali
# # Train tri3b, which is LDA+MLLT+SAT on 10k utts
# steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
# data/train_10k data/lang_nosp exp/tri2b_ali_10k exp/tri3b
steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
$idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_kaldi/tri2b_selected $exp_kaldi/tri3b
steps/train_sat.sh --cmd "$train_cmd" 4500 45000 \
$idata_kaldi/data_31k $idata_kaldi/lang $exp_kaldi/tri2b_ali $exp_kaldi/tri3b
steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
data-microsoft-mfcc/meeting_best_microsoft data-ESTER/lang_new exp-ESTER/tri3b exp-ESTER/tri3b_bis
# # decode using the tri3b model
# (
utils/mkgraph.sh $idata_kaldi/lang_test_tglarge \
$exp_kaldi/tri3b $exp_kaldi/tri3b/graph_test_tglarge
utils/mkgraph.sh $idata_kaldi/lang_test_french-small \
$exp_kaldi/tri3b $exp_kaldi/tri3b/graph_test_french-small
for test in meeting_test; do
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" \
$exp_kaldi/tri3b/graph_test_tgsmall $idata_kaldi/$test \
$exp_kaldi/tri3b/decode_tglarge_$test
$exp_kaldi/tri3b/graph_test_french-small $idata_kaldi/$test \
$exp_kaldi/tri3b/decode_tgsphinx_$test
# steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
# data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test
# steps/lmrescore_const_arpa.sh \
......@@ -297,14 +338,14 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
# # and re-create the lang directory.
# à comprendre
steps/get_prons.sh --cmd "$train_cmd" \
$idata_kaldi/meeting_best_microsoft $idata_kaldi/lang $exp_kaldi/tri3b
$idata_kaldi/data_31k $idata_kaldi/lang $exp_kaldi/tri3b
utils/dict_dir_add_pronprobs.sh --max-normalize true \
$idata_kaldi/local/dict \
$exp_kaldi/tri3b/pron_counts_nowb.txt $exp_kaldi/tri3b/sil_counts_nowb.txt \
$exp_kaldi/tri3b/pron_bigram_counts_nowb.txt $idata_kaldi/local/dict_new
utils/prepare_lang.sh $idata_kaldi/local/dict_new \
"<UNK>" $idata_kaldi/local/lang_tmp_new $idata_kaldi/lang_new
"<unk>" $idata_kaldi/local/lang_tmp_new $idata_kaldi/lang_new
local/format_lms.sh --src-dir $idata_kaldi/lang_new $idata_kaldi/local/lm
# utils/build_const_arpa_lm.sh \
......@@ -319,11 +360,11 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
#utils/mkgraph.sh \
# $idata_kaldi/lang_new_test_french-small $exp_kaldi/tri3b $exp_kaldi/tri3b/graph_french-small
utils/mkgraph.sh \
$idata_kaldi/lang_new_test_tgmix $exp_kaldi/tri3b $exp_kaldi/tri3b/graph_tgmix
$idata_kaldi/lang_new_test_french-small $exp_kaldi/tri3b $exp_kaldi/tri3b/graph_french-small
for test in meeting_test; do
steps/decode_fmllr.sh --nj 2 --cmd "$decode_cmd" \
$exp_kaldi/tri3b/graph_tgmix $idata_kaldi/$test \
$exp_kaldi/tri3b/decode_lang_new_tgmix_$test
steps/decode_fmllr.sh --nj 4 --cmd "$decode_cmd" \
$exp_kaldi/tri3b/graph_french-small $idata_kaldi/data_10kshort \
$exp_kaldi/tri3b/decode_lang_new_french-small_data_10kshort
# steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
# data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test
# steps/lmrescore_const_arpa.sh \
......@@ -355,10 +396,10 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
--minibatch-size "$minibatch_size" \
--num-jobs-nnet 4 --mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-hidden-layers 4 \
--num-hidden-layers 3 \
--pnorm-input-dim 2000 --pnorm-output-dim 400 \
--cmd "$decode_cmd" \
$idata_kaldi/meeting_best_microsoft $idata_kaldi/lang_new $exp_kaldi/tri3b $exp_kaldi/nn2
$idata_kaldi/data $idata_kaldi/lang_new $exp_kaldi/tri3b $exp_kaldi/nn2
for test in meeting_test; do
......@@ -394,19 +435,36 @@ done
# # ... and then combine the two sets into a 460 hour one
# utils/combine_data.sh \
# data/train_clean_460 data/train_clean_100 data/train_clean_360
# Phase1(30H) & Phase 2(50H): 90H
utils/combine_data.sh \
$idata_kaldi/DATA_1_2 $idata_kaldi/data $idata_kaldi/DATA2
# # align the new, combined set, using the tri4b model
# steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
# data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460
steps/align_fmllr.sh --nj 4 --cmd "$train_cmd" \
$idata_kaldi/DATA_1_2 $idata_kaldi/lang_new $exp_kaldi/tri3b $exp_kaldi/tri3b_ali_90
# # create a larger SAT model, trained on the 460 hours of data.
# steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \
# data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b
steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \
$idata_kaldi/DATA_1_2 $idata_kaldi/lang_new $exp_kaldi/tri3b_ali_90 $exp_kaldi/tri5b
steps/train_sat.sh --cmd "$train_cmd" 7000 150000 \
$idata_kaldi/DATA_1_2 $idata_kaldi/lang_new $exp_kaldi/tri3b_ali_90 $exp_kaldi/tri5b
# # decode using the tri5b model
# (
# utils/mkgraph.sh data/lang_test_tgsmall \
# exp/tri5b exp/tri5b/graph_tgsmall
utils/mkgraph.sh $idata_kaldi/lang_new_test_french-small \
$exp_kaldi/tri5b $exp_kaldi/tri5b/graph_tgsmall
steps/decode_fmllr.sh --nj 4 --cmd "$decode_cmd" \
$exp_kaldi/tri5b/graph_tgsmall $idata_kaldi/DATA \
$exp_kaldi/tri5b/decode_french-small_EVAL2005
# for test in test_clean test_other dev_clean dev_other; do
# steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
# exp/tri5b/graph_tgsmall data/$test \
......@@ -424,7 +482,24 @@ done
# # train a NN model on the 460 hour set
# local/nnet2/run_6a_clean_460.sh
#local/nnet2/run_6a_clean_460.sh
num_threads=4
parallel_opts="-pe smp $num_threads"
minibatch_size=128
steps/nnet2/train_pnorm_fast.sh --stage -10 \
--samples-per-iter 400000 \
--num-epochs 7 --num-epochs-extra 3 \
--parallel-opts "$parallel_opts" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--num-jobs-nnet 4 --mix-up 10000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-hidden-layers 4 \
--pnorm-input-dim 4000 --pnorm-output-dim 400 \
--cmd "$decode_cmd" \
$idata_kaldi/DATA_1_2 $idata_kaldi/lang_new $exp_kaldi/tri5b $exp_kaldi/nn90
# local/download_and_untar.sh $data $data_url train-other-500
# # prepare the 500 hour subset.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment