Newer
Older
},
"outputs": [],
"source": [
"#!/bin/bash\n",
"\n",
"# Copyright 2017 Abdel HEBA @Linagora\n",
"# Pense a ajouter utils/fix_data_dir.sh data/test to fix utterance error\n",
"# Running on Koios J=12\n",
"#\n",
". cmd.sh\n",
". path.sh\n",
"# link utils & steps\n",
"#ln -s $KALDI_ROOT/egs/wsj/s5/utils\n",
"#ln -s $KALDI_ROOT/egs/wsj/s5/steps"
]
},
{
"cell_type": "markdown",
"metadata": {
"scrolled": false
},
"source": [
"# Link SpeechDatabase & Kaldi directories"
]
},
{
"cell_type": "code",
"#corpus_path=/home/abdelwah/Documents/STT/corpus/ESTER\n",
"corpus_path=/fast/LINAGORA/Corpus/\n",
"\n",
"idata_kaldi=data-ESTER-V4\n",
"exp_kaldi=exp-ESTER-V4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data preparation"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"prepare data\n",
"utils/data/get_utt2dur.sh: working out data-ESTER-V4/data/utt2dur from data-ESTER-V4/data/segments\n",
"utils/data/get_utt2dur.sh: computed data-ESTER-V4/data/utt2dur\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/data\n",
"Successfully prepared data in data-ESTER-V4/data..\n",
"prepare DATA2\n",
"utils/data/get_utt2dur.sh: working out data-ESTER-V4/DATA2/utt2dur from data-ESTER-V4/DATA2/segments\n",
"utils/data/get_utt2dur.sh: computed data-ESTER-V4/DATA2/utt2dur\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/DATA2\n",
"Successfully prepared data in data-ESTER-V4/DATA2..\n",
"prepare DATA\n",
"utils/data/get_utt2dur.sh: working out data-ESTER-V4/DATA/utt2dur from data-ESTER-V4/DATA/segments\n",
"utils/data/get_utt2dur.sh: computed data-ESTER-V4/DATA/utt2dur\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/DATA\n",
"Successfully prepared data in data-ESTER-V4/DATA..\n"
"# Dependences\n",
"#pip3 install num2words --user\n",
"#pip3 install unidecode --user\n",
"# Prepare data input for kaldi processing\n",
"data=$corpus_path/Corpus/ESTER/DGA/Phase1\n",
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"for part in data; do\n",
" # use underscore-separated names in data directories.\n",
" echo \"prepare $part\"\n",
" #local/data_prepTCOF.sh $data/$part $idata_kaldi/$part\n",
" # probleme event (URL:)\n",
" local/data_prepESTER.sh $data/$part $idata_kaldi/$part\n",
"done\n",
"\n",
"# Prepare data input for kaldi processing\n",
"data_phase2=$corpus_path/Corpus/ESTER/DGA/Phase2\n",
"for part in DATA2; do\n",
" # use underscore-separated names in data directories.\n",
" echo \"prepare $part\"\n",
" #local/data_prepTCOF.sh $data/$part $idata_kaldi/$part\n",
" # probleme event (URL:)\n",
" local/data_prepESTER.sh $data_phase2/$part $idata_kaldi/$part\n",
"done\n",
"\n",
"# ESTER DATA EVALUATION\n",
"data_test=$corpus_path/Corpus/ESTER/DGA/Eval2005\n",
"for part in DATA; do\n",
" # use underscore-separated names in data directories.\n",
" echo \"prepare $part\"\n",
" #local/data_prepTCOF.sh $data/$part $idata_kaldi/$part\n",
" # probleme event (URL:)\n",
" local/data_prepESTER.sh $data_test/$part $idata_kaldi/$part\n",
"done"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SNR metric computed for each part of the corpus"
]
},
{
"cell_type": "code",
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#@ToDO : add snr evaluation for each Part of Corpus\n",
"# Evaluate SNR for each segment of ESTER\n",
"#evaluate_snr=eval-snr-ESTER/Eval2005\n",
"#mkdir -p $evaluate_snr\n",
"#for part in data; do\n",
"# echo \"Evaluate $part\"\n",
"# local/evaluation/evaluate_snr.sh $idata_kaldi/$part $evaluate_snr\n",
"#done\n",
"data=$corpus_path/Corpus/ESTER/DGA/Phase1\n",
"data_phase2=$corpus_path/Corpus/ESTER/DGA/Phase2\n",
"data_test=$corpus_path/Corpus/ESTER/DGA/Eval2005"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Build Language model from Text"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Splitting into 2 parts, to allow for parallel processing ...\n",
"Checking the splits ...\n",
"Performing text normalization (2 jobs) - check data-ESTER-V4/local/lm/norm/tmp/txt_norm.JOB.log ...\n",
"Finished OK\n",
"Selecting the vocabulary (400000 words) ...\n",
"Making the corpus and the vocabulary ...\n",
"Word counts saved to 'data-ESTER-V4/local/lm/word_counts.txt'\n",
"Vocabulary saved as 'data-ESTER-V4/local/lm/meeting-vocab.txt'\n",
"All unique sentences (in sorted order) stored in 'data-ESTER-V4/local/lm/meeting-lm-norm.txt.gz'\n",
"Counting the total number word tokens in the corpus ...\n",
"There are 992954 tokens in the corpus\n",
"Training a 3-gram LM ...\n",
"This implementation assumes that you have a lot of free RAM(> 12GB) on your machine\n",
"If that's not the case, consider something like: http://joshua-decoder.org/4.0/large-lms.html\n",
"3,6M\tdata-ESTER-V4/local/lm/lm_tglarge.arpa.gz\n",
"Creating a 'small' pruned 3-gram LM (threshold: 0.0000003) ...\n",
"data-ESTER-V4/local/lm/lm_tglarge.arpa.gz: line 10: warning: non-zero probability for <unk> in closed-vocabulary LM\n",
"2,9M\tdata-ESTER-V4/local/lm/lm_tgsmall.arpa.gz\n",
"Creating a 'medium' pruned 3-gram LM (threshold: 0.0000001) ...\n",
"data-ESTER-V4/local/lm/lm_tglarge.arpa.gz: line 10: warning: non-zero probability for <unk> in closed-vocabulary LM\n",
"3,3M\tdata-ESTER-V4/local/lm/lm_tgmed.arpa.gz\n",
"4,4M\tdata-ESTER-V4/local/lm/lm_fglarge.arpa.gz\n"
]
}
],
"source": [
"# Build Language model\n",
"# Add proununciation model for all phase 1 & 2\n",
"LM_train_text=$corpus_path/Corpus/Textall\n",
"local/lm/train_lm.sh $LM_train_text \\\n",
"$idata_kaldi/local/lm/norm/tmp $idata_kaldi/local/lm/norm/norm_texts $idata_kaldi/local/lm\n",
"# check characters:\n",
"# awk '{for(i=1;i<=NF;i++)if(!a[$i]++)print $i\"\\n\"}' ORS= FS= $idata_kaldi/local/lm/meeting-vocab.txt | sort -b"
]
},
{
"cell_type": "code",
"execution_count": 64,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/fast/LINAGORA/tools/LM/lm_tgsphinx.arpa.gz: line 35: warning: non-zero probability for <unk> in closed-vocabulary LM\n",
"data-ESTER-V4/local/lm/lm_tglarge.arpa.gz: line 10: warning: non-zero probability for <unk> in closed-vocabulary LM\n",
"BOW numerator for context \"\" is -3.84557e-05 < 0\n",
"BOW numerator for context \"c' que\" is -0.00159251 < 0\n",
"reading 65199 1-grams\n",
"exp-ESTER-V4/eval_LM/mixed.gz: line 35: warning: non-zero probability for <unk> in closed-vocabulary LM\n",
"reading 18557872 2-grams\n",
"reading 23633745 3-grams\n"
]
}
],
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"# Evaluate LM\n",
"testfile=$idata_kaldi/DATA/text_without_noise_tag\n",
"# LM's\n",
"LM_LIUM_LARGE=/fast/LINAGORA/tools/LM/lm_tgsphinx.arpa.gz\n",
"LM_LIUM_SMALL=/fast/LINAGORA/tools/LM/lm_french-small.arpa.gz\n",
"LM_ESTER_tglarge=$idata_kaldi/local/lm/lm_tglarge.arpa.gz\n",
"LM_ESTER_fglarge=$idata_kaldi/local/lm/lm_fglarge.arpa.gz\n",
"# dir eval language model\n",
"dir_eval_lm=$exp_kaldi/eval_LM\n",
"mkdir -p $dir_eval_lm\n",
"\n",
"# Get Text from kaldi text format\n",
"#cut -f2- -d' ' < $idata_kaldi/DATA/text | cut -d ' ' -f2- |\\\n",
"#sed -e 's/[ ]\\+/ /g' | sed -e 's/<[^ ][^ ]*>\\|!sil//g' > $testfile\n",
"# Lium 3-gram all\n",
"#ngram -lm $LM_LIUM_LARGE -ppl $testfile -debug 2 > $dir_eval_lm/LM_tgLium_full.ppl\n",
"# Lium 3-gram pruné\n",
"#ngram -lm $LM_LIUM_SMALL -ppl $testfile -debug 2 > $dir_eval_lm/LM_tgLium_small.ppl\n",
"# ESTER 3-gram\n",
"#ngram -lm $LM_ESTER_tglarge -ppl $testfile -debug 2 > $dir_eval_lm/LM_tgESTER.ppl\n",
"# ESTER 4-gram\n",
"#ngram -lm $LM_ESTER_fglarge -ppl $testfile -debug 2 > $dir_eval_lm/LM_fgESTER.ppl\n",
"#===== Mixe languages\n",
"# compute best lambda\n",
"#compute-best-mix $dir_eval_lm/LM_tgLium_full.ppl $dir_eval_lm/LM_tgESTER.ppl > $dir_eval_lm/best_mix\n",
"# mixe languages\n",
"ngram -lm $LM_LIUM_LARGE -mix-lm $LM_ESTER_tglarge -lambda 0.714713 -write-lm $dir_eval_lm/mixed.gz\n",
"# compute perplexity\n",
"ngram -lm $dir_eval_lm/mixed.gz -ppl $testfile -debug 2 > $dir_eval_lm/LM_mixed.ppl"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/fast/LINAGORA/tools/kaldi/tools/tmp2\n",
"Assertions.cc\t misc.pyc\t\tsetup.py\n",
"Assertions.hh\t mt.py\t\t\tSimpleGoodTuring.py\n",
"bin\t\t Multigram.cc\t\tSparseVector.pyx\n",
"build\t\t MultigramGraph.hh\tsymbols.py\n",
"CHANGES\t\t Multigram.hh\t\tsymbols.pyc\n",
"EditDistance.cc Obstack.hh\t\ttest-g2p.sh\n",
"Estimation.cc\t PriorityQueue.hh\ttest_LanguageModel.py\n",
"Evaluation.py\t Probability.hh\ttest_mGramCounts.py\n",
"Evaluation.pyc\t Python.hh\t\ttest_Minimization.py\n",
"fsa.py\t\t README\t\ttestProbability.cc\n",
"g2p.py\t\t ReferenceCounting.hh\ttest.py\n",
"Graph.cc\t SequenceModel.cc\ttest_SequenceModel.py\n",
"Graph.hh\t SequenceModel.hh\ttest_sequitur.py\n",
"groupedCounts.py SequenceModel.py\ttest_SparseVector.py\n",
"IterMap.py\t SequenceModel.pyc\ttool.py\n",
"LanguageModel.py sequitur.i\t\ttool.pyc\n",
"lib\t\t sequitur_.py\t\tTranslation.cc\n",
"LICENSE\t\t sequitur.py\t\tTypes.cc\n",
"Makefile\t sequitur_.pyc\t\tTypes.hh\n",
"makeOvModel.py\t sequitur.pyc\t\tUtility.cc\n",
"mGramCounts.py\t SequiturTool.py\tUtility.hh\n",
"Minimization.py SequiturTool.pyc\txmlwriter.py\n",
"Minimization.pyc sequitur_wrap.cpp\n",
"misc.py\t\t setup.cfg\n"
]
}
],
"source": [
"# Some above could be made:\n",
"# Learning Grapheme to phonem from dictionary\n",
"#local/g2p/train_g2p.sh cmu_dict data/local/lm\n",
"#echo $SRILM_ROOT\n",
"#ls $KALDI_ROOT/tools/sequitur"
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 105015 \n",
" 52871 aa\n",
" 1 aaq\n",
" 49015 ai\n",
" 13761 bb\n",
" 5354 ch\n",
" 20386 dd\n",
" 32293 ee\n",
" 22574 ei\n",
" 1816 eu\n",
" 11272 ff\n",
" 7160 in\n",
" 8405 jj\n",
" 29273 kk\n",
" 31389 ll\n",
" 21281 mm\n",
" 21994 nn\n",
" 2169 oe\n",
" 10989 on\n",
" 25794 oo\n",
" 7339 ou\n",
" 64179 rr\n",
" 38531 ss\n",
" 47034 tt\n",
" 182 un\n",
" 13873 uu\n",
" 1225 uy\n",
" 28250 zz\n",
"Downloading and preparing CMUdict\n",
"Autogenerating pronunciations for the words in data-ESTER-V4/local/dict/g2p/vocab_autogen.* ...\n",
"2883\n",
"2883\n",
"2883 pronunciations autogenerated OK\n",
"Combining the CMUdict pronunciations with the autogenerated ones ...\n",
"Combined lexicon saved to 'data-ESTER-V4/local/dict/lexicon_raw_nosil.txt'\n",
"Preparing phone lists and clustering questions\n",
"4 silence phones saved to: data-ESTER-V4/local/dict/silence_phones.txt\n",
"1 optional silence saved to: data-ESTER-V4/local/dict/optional_silence.txt\n",
"36 non-silence phones saved to: data-ESTER-V4/local/dict/nonsilence_phones.txt\n",
"2 extra triphone clustering-related questions saved to: data-ESTER-V4/local/dict/extra_questions.txt\n",
"Lexicon text file saved as: data-ESTER-V4/local/dict/lexicon.txt\n"
]
}
],
"source": [
"#### Prepare dict: add words which doesn't exist in dictionnary + config files...\n",
"# print number of phonem used in french\n",
"dir_repos=/fast/LINAGORA/STT/Thesis_aheba\n",
"dir_repos=/home/lingora/Documents/Linagora/kaldi/egs/Linagora/Thesis_aheba\n",
"cat $dir_repos/cmu_dict/fr.dict | awk '{$1=\"\";print $0}' | tr ' ' '\\n' | sort -b | uniq -c\n",
"mkdir -p $idata_kaldi/local/dict/cmudict\n",
"cp $dir_repos/cmu_dict/fr.dict $idata_kaldi/local/dict/fr.dict\n",
"cp $dir_repos/g2p/model-5 $idata_kaldi/local/lm/g2p\n",
"\n",
"local/prepare_dict.sh --stage 0 --nj 4 --cmd \"$train_cmd\" \\\n",
" $idata_kaldi/local/lm $idata_kaldi/local/lm/g2p $idata_kaldi/local/dict"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prepare L.fst"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Checking data-ESTER-V4/local/dict/silence_phones.txt ...\n",
"--> reading data-ESTER-V4/local/dict/silence_phones.txt\n",
"--> data-ESTER-V4/local/dict/silence_phones.txt is OK\n",
"Checking data-ESTER-V4/local/dict/optional_silence.txt ...\n",
"--> reading data-ESTER-V4/local/dict/optional_silence.txt\n",
"--> data-ESTER-V4/local/dict/optional_silence.txt is OK\n",
"Checking data-ESTER-V4/local/dict/nonsilence_phones.txt ...\n",
"--> reading data-ESTER-V4/local/dict/nonsilence_phones.txt\n",
"--> data-ESTER-V4/local/dict/nonsilence_phones.txt is OK\n",
"\n",
"Checking disjoint: silence_phones.txt, nonsilence_phones.txt\n",
"--> disjoint property is OK.\n",
"\n",
"Checking data-ESTER-V4/local/dict/lexicon.txt\n",
"--> reading data-ESTER-V4/local/dict/lexicon.txt\n",
"--> data-ESTER-V4/local/dict/lexicon.txt is OK\n",
"Checking data-ESTER-V4/local/dict/extra_questions.txt ...\n",
"--> reading data-ESTER-V4/local/dict/extra_questions.txt\n",
"--> data-ESTER-V4/local/dict/extra_questions.txt is OK\n",
"--> SUCCESS [validating dictionary directory data-ESTER-V4/local/dict]\n",
"**Creating data-ESTER-V4/local/dict/lexiconp.txt from data-ESTER-V4/local/dict/lexicon.txt\n",
"fstaddselfloops data-ESTER-V4/lang/phones/wdisambig_phones.int data-ESTER-V4/lang/phones/wdisambig_words.int \n",
"prepare_lang.sh: validating output directory\n",
"utils/validate_lang.pl data-ESTER-V4/lang\n",
"Checking data-ESTER-V4/lang/phones.txt ...\n",
"--> data-ESTER-V4/lang/phones.txt is OK\n",
"\n",
"Checking words.txt: #0 ...\n",
"--> data-ESTER-V4/lang/words.txt is OK\n",
"\n",
"Checking disjoint: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> silence.txt and nonsilence.txt are disjoint\n",
"--> silence.txt and disambig.txt are disjoint\n",
"--> disambig.txt and nonsilence.txt are disjoint\n",
"--> disjoint property is OK\n",
"\n",
"Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> summation property is OK\n",
"\n",
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
"Checking data-ESTER-V4/lang/phones/context_indep.{txt, int, csl} ...\n",
"--> 20 entry/entries in data-ESTER-V4/lang/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang/phones/context_indep.int corresponds to data-ESTER-V4/lang/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang/phones/context_indep.csl corresponds to data-ESTER-V4/lang/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang/phones/context_indep.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/nonsilence.{txt, int, csl} ...\n",
"--> 144 entry/entries in data-ESTER-V4/lang/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang/phones/nonsilence.int corresponds to data-ESTER-V4/lang/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang/phones/nonsilence.csl corresponds to data-ESTER-V4/lang/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang/phones/nonsilence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/silence.{txt, int, csl} ...\n",
"--> 20 entry/entries in data-ESTER-V4/lang/phones/silence.txt\n",
"--> data-ESTER-V4/lang/phones/silence.int corresponds to data-ESTER-V4/lang/phones/silence.txt\n",
"--> data-ESTER-V4/lang/phones/silence.csl corresponds to data-ESTER-V4/lang/phones/silence.txt\n",
"--> data-ESTER-V4/lang/phones/silence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/optional_silence.{txt, int, csl} ...\n",
"--> 1 entry/entries in data-ESTER-V4/lang/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang/phones/optional_silence.int corresponds to data-ESTER-V4/lang/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang/phones/optional_silence.csl corresponds to data-ESTER-V4/lang/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang/phones/optional_silence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/disambig.{txt, int, csl} ...\n",
"--> 14 entry/entries in data-ESTER-V4/lang/phones/disambig.txt\n",
"--> data-ESTER-V4/lang/phones/disambig.int corresponds to data-ESTER-V4/lang/phones/disambig.txt\n",
"--> data-ESTER-V4/lang/phones/disambig.csl corresponds to data-ESTER-V4/lang/phones/disambig.txt\n",
"--> data-ESTER-V4/lang/phones/disambig.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/roots.{txt, int} ...\n",
"--> 40 entry/entries in data-ESTER-V4/lang/phones/roots.txt\n",
"--> data-ESTER-V4/lang/phones/roots.int corresponds to data-ESTER-V4/lang/phones/roots.txt\n",
"--> data-ESTER-V4/lang/phones/roots.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/sets.{txt, int} ...\n",
"--> 40 entry/entries in data-ESTER-V4/lang/phones/sets.txt\n",
"--> data-ESTER-V4/lang/phones/sets.int corresponds to data-ESTER-V4/lang/phones/sets.txt\n",
"--> data-ESTER-V4/lang/phones/sets.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/extra_questions.{txt, int} ...\n",
"--> 11 entry/entries in data-ESTER-V4/lang/phones/extra_questions.txt\n",
"--> data-ESTER-V4/lang/phones/extra_questions.int corresponds to data-ESTER-V4/lang/phones/extra_questions.txt\n",
"--> data-ESTER-V4/lang/phones/extra_questions.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang/phones/word_boundary.{txt, int} ...\n",
"--> 164 entry/entries in data-ESTER-V4/lang/phones/word_boundary.txt\n",
"--> data-ESTER-V4/lang/phones/word_boundary.int corresponds to data-ESTER-V4/lang/phones/word_boundary.txt\n",
"--> data-ESTER-V4/lang/phones/word_boundary.{txt, int} are OK\n",
"\n",
"Checking optional_silence.txt ...\n",
"--> reading data-ESTER-V4/lang/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang/phones/optional_silence.txt is OK\n",
"\n",
"Checking disambiguation symbols: #0 and #1\n",
"--> data-ESTER-V4/lang/phones/disambig.txt has \"#0\" and \"#1\"\n",
"--> data-ESTER-V4/lang/phones/disambig.txt is OK\n",
"\n",
"Checking topo ...\n",
"\n",
"Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> data-ESTER-V4/lang/phones/word_boundary.txt doesn't include disambiguation symbols\n",
"--> data-ESTER-V4/lang/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n",
"--> data-ESTER-V4/lang/phones/word_boundary.txt is OK\n",
"\n",
"Checking word-level disambiguation symbols...\n",
"--> data-ESTER-V4/lang/phones/wdisambig.txt exists (newer prepare_lang.sh)\n",
"Checking word_boundary.int and disambig.int\n",
"--> generating a 36 word sequence\n",
"--> resulting phone sequence from L.fst corresponds to the word sequence\n",
"--> L.fst is OK\n",
"--> generating a 36 word sequence\n",
"--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n",
"--> L_disambig.fst is OK\n",
"\n",
"Checking data-ESTER-V4/lang/oov.{txt, int} ...\n",
"--> 1 entry/entries in data-ESTER-V4/lang/oov.txt\n",
"--> data-ESTER-V4/lang/oov.int corresponds to data-ESTER-V4/lang/oov.txt\n",
"--> data-ESTER-V4/lang/oov.{txt, int} are OK\n",
"--> data-ESTER-V4/lang/L.fst is olabel sorted\n",
"--> data-ESTER-V4/lang/L_disambig.fst is olabel sorted\n",
"--> SUCCESS [validating lang directory data-ESTER-V4/lang]\n"
]
}
],
"source": [
"#### Prepare Lang ==> L.fst Vocabulary's automate finite state\n",
"utils/prepare_lang.sh $idata_kaldi/local/dict \\\n",
" \"<unk>\" $idata_kaldi/local/lang_tmp $idata_kaldi/lang"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Prepare G.fst"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
"arpa2fst --disambig-symbol=#0 --read-symbol-table=data-ESTER-V4/lang_test_mixed/words.txt - data-ESTER-V4/lang_test_mixed/G.fst \n",
"LOG (arpa2fst:Read():arpa-file-parser.cc:90) Reading \\data\\ section.\n",
"LOG (arpa2fst:Read():arpa-file-parser.cc:145) Reading \\1-grams: section.\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 8 [-5.360068\t-ce\t-0.1533034] skipped: word '-ce' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 9 [-5.018168\t-ci\t-0.2031254] skipped: word '-ci' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 10 [-4.350668\t-elle\t-0.2460782] skipped: word '-elle' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 11 [-4.576869\t-elles\t-0.2385286] skipped: word '-elles' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 12 [-5.052068\t-en\t-0.192779] skipped: word '-en' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 13 [-4.192168\t-il\t-0.2843367] skipped: word '-il' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 14 [-4.315768\t-ils\t-0.2745169] skipped: word '-ils' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 15 [-5.272868\t-je\t-0.1903227] skipped: word '-je' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 16 [-5.103168\t-la\t-0.1458162] skipped: word '-la' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 17 [-4.874768\t-le\t-0.2040312] skipped: word '-le' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 18 [-5.158868\t-les\t-0.1417429] skipped: word '-les' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 19 [-5.694868\t-lui\t-0.1158859] skipped: word '-lui' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 20 [-3.746768\t-là\t-0.3350702] skipped: word '-là' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 21 [-5.209268\t-moi\t-0.2134934] skipped: word '-moi' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 22 [-4.933168\t-même\t-0.1705472] skipped: word '-même' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 23 [-5.628368\t-mêmes\t-0.1271463] skipped: word '-mêmes' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 24 [-4.859869\t-nous\t-0.2231263] skipped: word '-nous' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 25 [-4.466768\t-on\t-0.2460155] skipped: word '-on' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 26 [-4.034568\t-t\t-0.4986984] skipped: word '-t' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 27 [-4.542568\t-t-elle\t-0.1772586] skipped: word '-t-elle' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 28 [-4.428768\t-t-il\t-0.1768332] skipped: word '-t-il' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 29 [-5.531868\t-toi\t-0.1358383] skipped: word '-toi' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 30 [-5.546568\t-tu\t-0.1201105] skipped: word '-tu' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 31 [-4.782269\t-vous\t-0.2394385] skipped: word '-vous' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 32 [-5.748569\t-y\t-0.1000024] skipped: word '-y' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 42 [-5.464468\ta1\t-0.1763418] skipped: word 'a1' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 43 [-5.834968\ta10\t-0.1255163] skipped: word 'a10' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 44 [-5.842168\ta104\t-0.09675514] skipped: word 'a104' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 45 [-6.147268\ta11\t-0.07661788] skipped: word 'a11' not in symbol table\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:192) line 46 [-5.948668\ta13\t-0.1366806] skipped: word 'a13' not in symbol table\n",
"LOG (arpa2fst:Read():arpa-file-parser.cc:145) Reading \\2-grams: section.\n",
"LOG (arpa2fst:Read():arpa-file-parser.cc:145) Reading \\3-grams: section.\n",
"WARNING (arpa2fst:Read():arpa-file-parser.cc:231) Of 12719307 parse warnings, 30 were reported. Run program with --max_warnings=-1 to see all warnings\n",
"utils/validate_lang.pl data-ESTER-V4/lang_test_mixed\n",
"Checking data-ESTER-V4/lang_test_mixed/phones.txt ...\n",
"--> data-ESTER-V4/lang_test_mixed/phones.txt is OK\n",
"\n",
"Checking words.txt: #0 ...\n",
"--> data-ESTER-V4/lang_test_mixed/words.txt is OK\n",
"\n",
"Checking disjoint: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> silence.txt and nonsilence.txt are disjoint\n",
"--> silence.txt and disambig.txt are disjoint\n",
"--> disambig.txt and nonsilence.txt are disjoint\n",
"--> disjoint property is OK\n",
"\n",
"Checking sumation: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> summation property is OK\n",
"\n",
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
"Checking data-ESTER-V4/lang_test_mixed/phones/context_indep.{txt, int, csl} ...\n",
"--> 20 entry/entries in data-ESTER-V4/lang_test_mixed/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/context_indep.int corresponds to data-ESTER-V4/lang_test_mixed/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/context_indep.csl corresponds to data-ESTER-V4/lang_test_mixed/phones/context_indep.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/context_indep.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/nonsilence.{txt, int, csl} ...\n",
"--> 144 entry/entries in data-ESTER-V4/lang_test_mixed/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/nonsilence.int corresponds to data-ESTER-V4/lang_test_mixed/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/nonsilence.csl corresponds to data-ESTER-V4/lang_test_mixed/phones/nonsilence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/nonsilence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/silence.{txt, int, csl} ...\n",
"--> 20 entry/entries in data-ESTER-V4/lang_test_mixed/phones/silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/silence.int corresponds to data-ESTER-V4/lang_test_mixed/phones/silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/silence.csl corresponds to data-ESTER-V4/lang_test_mixed/phones/silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/silence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/optional_silence.{txt, int, csl} ...\n",
"--> 1 entry/entries in data-ESTER-V4/lang_test_mixed/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/optional_silence.int corresponds to data-ESTER-V4/lang_test_mixed/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/optional_silence.csl corresponds to data-ESTER-V4/lang_test_mixed/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/optional_silence.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/disambig.{txt, int, csl} ...\n",
"--> 14 entry/entries in data-ESTER-V4/lang_test_mixed/phones/disambig.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/disambig.int corresponds to data-ESTER-V4/lang_test_mixed/phones/disambig.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/disambig.csl corresponds to data-ESTER-V4/lang_test_mixed/phones/disambig.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/disambig.{txt, int, csl} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/roots.{txt, int} ...\n",
"--> 40 entry/entries in data-ESTER-V4/lang_test_mixed/phones/roots.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/roots.int corresponds to data-ESTER-V4/lang_test_mixed/phones/roots.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/roots.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/sets.{txt, int} ...\n",
"--> 40 entry/entries in data-ESTER-V4/lang_test_mixed/phones/sets.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/sets.int corresponds to data-ESTER-V4/lang_test_mixed/phones/sets.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/sets.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/extra_questions.{txt, int} ...\n",
"--> 11 entry/entries in data-ESTER-V4/lang_test_mixed/phones/extra_questions.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/extra_questions.int corresponds to data-ESTER-V4/lang_test_mixed/phones/extra_questions.txt\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"--> data-ESTER-V4/lang_test_mixed/phones/extra_questions.{txt, int} are OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/phones/word_boundary.{txt, int} ...\n",
"--> 164 entry/entries in data-ESTER-V4/lang_test_mixed/phones/word_boundary.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/word_boundary.int corresponds to data-ESTER-V4/lang_test_mixed/phones/word_boundary.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/word_boundary.{txt, int} are OK\n",
"\n",
"Checking optional_silence.txt ...\n",
"--> reading data-ESTER-V4/lang_test_mixed/phones/optional_silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/optional_silence.txt is OK\n",
"\n",
"Checking disambiguation symbols: #0 and #1\n",
"--> data-ESTER-V4/lang_test_mixed/phones/disambig.txt has \"#0\" and \"#1\"\n",
"--> data-ESTER-V4/lang_test_mixed/phones/disambig.txt is OK\n",
"\n",
"Checking topo ...\n",
"\n",
"Checking word_boundary.txt: silence.txt, nonsilence.txt, disambig.txt ...\n",
"--> data-ESTER-V4/lang_test_mixed/phones/word_boundary.txt doesn't include disambiguation symbols\n",
"--> data-ESTER-V4/lang_test_mixed/phones/word_boundary.txt is the union of nonsilence.txt and silence.txt\n",
"--> data-ESTER-V4/lang_test_mixed/phones/word_boundary.txt is OK\n",
"\n",
"Checking word-level disambiguation symbols...\n",
"--> data-ESTER-V4/lang_test_mixed/phones/wdisambig.txt exists (newer prepare_lang.sh)\n",
"Checking word_boundary.int and disambig.int\n",
"--> generating a 12 word sequence\n",
"--> resulting phone sequence from L.fst corresponds to the word sequence\n",
"--> L.fst is OK\n",
"--> generating a 91 word sequence\n",
"--> resulting phone sequence from L_disambig.fst corresponds to the word sequence\n",
"--> L_disambig.fst is OK\n",
"\n",
"Checking data-ESTER-V4/lang_test_mixed/oov.{txt, int} ...\n",
"--> 1 entry/entries in data-ESTER-V4/lang_test_mixed/oov.txt\n",
"--> data-ESTER-V4/lang_test_mixed/oov.int corresponds to data-ESTER-V4/lang_test_mixed/oov.txt\n",
"--> data-ESTER-V4/lang_test_mixed/oov.{txt, int} are OK\n",
"--> data-ESTER-V4/lang_test_mixed/L.fst is olabel sorted\n",
"--> data-ESTER-V4/lang_test_mixed/L_disambig.fst is olabel sorted\n",
"--> data-ESTER-V4/lang_test_mixed/G.fst is ilabel sorted\n",
"--> data-ESTER-V4/lang_test_mixed/G.fst has 11264129 states\n",
"--> utils/lang/check_g_properties.pl successfully validated data-ESTER-V4/lang_test_mixed/G.fst\n",
"--> utils/lang/check_g_properties.pl succeeded.\n",
"--> SUCCESS [validating lang directory data-ESTER-V4/lang_test_mixed]\n",
"Succeeded in formatting data.\n"
]
}
],
"source": [
"#### Prepare Contextual automate finite state using LM's ===> Build G.fst\n",
"#### Copy the largest LMS built from LIUM and generate G.fst :\n",
"# Large LM\n",
"ln -s $LM_LIUM_LARGE $idata_kaldi/local/lm/\n",
"# Pruned LM\n",
"ln -s $LM_LIUM_SMALL $idata_kaldi/local/lm/\n",
"# Mixed\n",
"dir_eval_lm=$exp_kaldi/eval_LM\n",
"#mv $dir_eval_lm/mixed.gz $dir_eval_lm/lm_mixed.arpa.gz\n",
"mv $dir_eval_lm/lm_mixed.arpa.gz $idata_kaldi/local/lm/\n",
"local/format_lms.sh --src-dir $idata_kaldi/lang $idata_kaldi/local/lm"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Feature extraction\n",
"Try to use & evaluate each Feature"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"steps/make_mfcc.sh --cmd run.pl --mem 64G --nj 32 data-ESTER-V4/data exp-ESTER-V4/make_mfcc/data mfcc\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/data\n",
"steps/make_mfcc.sh [info]: segments file exists: using that.\n",
"Succeeded creating MFCC features for data\n",
"steps/compute_cmvn_stats.sh data-ESTER-V4/data exp-ESTER-V4/make_mfcc/data mfcc\n",
"Succeeded creating CMVN stats for data\n",
"fix_data_dir.sh: kept all 35574 utterances.\n",
"fix_data_dir.sh: old files are kept in data-ESTER-V4/data/.backup\n",
"steps/make_mfcc.sh --cmd run.pl --mem 64G --nj 32 data-ESTER-V4/DATA exp-ESTER-V4/make_mfcc/DATA mfcc\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/DATA\n",
"steps/make_mfcc.sh [info]: segments file exists: using that.\n",
"Succeeded creating MFCC features for DATA\n",
"steps/compute_cmvn_stats.sh data-ESTER-V4/DATA exp-ESTER-V4/make_mfcc/DATA mfcc\n",
"Succeeded creating CMVN stats for DATA\n",
"fix_data_dir.sh: kept all 10486 utterances.\n",
"fix_data_dir.sh: old files are kept in data-ESTER-V4/DATA/.backup\n",
"steps/make_mfcc.sh --cmd run.pl --mem 64G --nj 32 data-ESTER-V4/DATA2 exp-ESTER-V4/make_mfcc/DATA2 mfcc\n",
"utils/validate_data_dir.sh: Successfully validated data-directory data-ESTER-V4/DATA2\n",
"steps/make_mfcc.sh [info]: segments file exists: using that.\n",
"It seems not all of the feature files were successfully processed (57340 != 57341);\n",
"consider using utils/fix_data_dir.sh data-ESTER-V4/DATA2\n",
"Succeeded creating MFCC features for DATA2\n",
"steps/compute_cmvn_stats.sh data-ESTER-V4/DATA2 exp-ESTER-V4/make_mfcc/DATA2 mfcc\n",
"Succeeded creating CMVN stats for DATA2\n",
"fix_data_dir.sh: kept 57340 utterances out of 57341\n",
"fix_data_dir.sh: old files are kept in data-ESTER-V4/DATA2/.backup\n",
"utils/combine_data.sh data-ESTER-V4/ESTER_All data-ESTER-V4/data data-ESTER-V4/DATA2\n",
"utils/combine_data.sh [info]: not combining utt2uniq as it does not exist\n",
"utils/combine_data.sh: combined segments\n",
"utils/combine_data.sh: combined utt2spk\n",
"utils/combine_data.sh [info]: not combining utt2lang as it does not exist\n",
"utils/combine_data.sh: combined utt2dur\n",
"utils/combine_data.sh: combined feats.scp\n",
"utils/combine_data.sh: combined text\n",
"utils/combine_data.sh: combined cmvn.scp\n",
"utils/combine_data.sh [info]: not combining reco2file_and_channel as it does not exist\n",
"utils/combine_data.sh: combined wav.scp\n",
"utils/combine_data.sh: combined spk2gender\n",
"fix_data_dir.sh: kept all 92914 utterances.\n",
"fix_data_dir.sh: old files are kept in data-ESTER-V4/ESTER_All/.backup\n"
]
}
],
"source": [
"# Feature Extraction MFCC:\n",
"mfccdir=mfcc\n",
"for part in data DATA DATA2; do\n",
" steps/make_mfcc.sh --cmd \"$train_cmd\" --nj 32 $idata_kaldi/$part $exp_kaldi/make_mfcc/$part $mfccdir\n",
" #steps/make_mfcc_pitch.sh --cmd \"$train_cmd\" --nj 12 $idata_kaldi/$part $exp_kaldi/make_mfcc/$part $mfccdir\n",
" steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_mfcc/$part $mfccdir\n",
" utils/fix_data_dir.sh $idata_kaldi/$part\n",
"done\n",
"# Combine data ESTER Phase 1 & Phase 2\n",
"utils/combine_data.sh $idata_kaldi/ESTER_All $idata_kaldi/data $idata_kaldi/DATA2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Feature Extraction PLP:\n",
"plpdir=plp\n",
"for part in data DATA DATA2; do\n",
" #PLP features\n",
" steps/make_plp.sh --cmd \"$train_cmd\" --nj 32 $idata_kaldi/$part $exp_kaldi/make_plp/$part $plpdir\n",
" steps/make_plp_pitch.sh --cmd \"$train_cmd\" --nj 32 $idata_kaldi/$part $exp_kaldi/make_plp/$part $plpdir\n",
" steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_plp/$part $plpdir\n",
" #Fbank\n",
" #steps/make_fbank.sh --cmd \"$train_cmd\" --nj 4 $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir\n",
" #steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir\n",
"done\n",
"# Combine data ESTER Phase 1 & Phase 2\n",
"utils/combine_data.sh $idata_kaldi/ESTER_All $idata_kaldi/data $idata_kaldi/DATA2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Feature extraction Fbanks:\n",
"fbankdir=fbank\n",
"for part in data DATA DATA2; do\n",
" #Fbank\n",
" steps/make_fbank.sh --cmd \"$train_cmd\" --nj 32 $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir\n",
" steps/make_fbank_pitch.sh --cmd \"$train_cmd\" --nj 32 $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir\n",
" steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir\n",
"done\n",
"# Combine data ESTER Phase 1 & Phase 2\n",
"utils/combine_data.sh $idata_kaldi/ESTER_All $idata_kaldi/data $idata_kaldi/DATA2"
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# obviously, if you use this, consider that you have some wrong segmentation in your clean data...\n",
"#utils/fix_data_dir.sh $idata_kaldi/data\n",
"#utils/fix_data_dir.sh $idata_kaldi/meeting_best_microsoft\n",
"#utils/fix_data_dir.sh $idata_kaldi/meeting_test"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split Data for training phases\n",
"Use --shortest for taking utt in accendent order (sorted by duration !)"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"feat-to-len scp:data-ESTER-V4/data/feats.scp ark,t:data-ESTER-V4/data_1kshort/tmp.len \n",
"sort: échec d'écriture: 'sortie standard': Relais brisé (pipe)\n",
"sort: erreur d'écriture\n",
"utils/subset_data_dir.sh: reducing #utt from 35574 to 1000\n",
"feat-to-len scp:data-ESTER-V4/data/feats.scp ark,t:data-ESTER-V4/data_5kshort/tmp.len \n",
"sort: échec d'écriture: 'sortie standard': Relais brisé (pipe)\n",
"sort: erreur d'écriture\n",
"utils/subset_data_dir.sh: reducing #utt from 35574 to 5000\n",
"feat-to-len scp:data-ESTER-V4/data/feats.scp ark,t:data-ESTER-V4/data_10kshort/tmp.len \n",
"sort: échec d'écriture: 'sortie standard': Relais brisé (pipe)\n",
"sort: erreur d'écriture\n",
"utils/subset_data_dir.sh: reducing #utt from 35574 to 10000\n",
"feat-to-len scp:data-ESTER-V4/data/feats.scp ark,t:data-ESTER-V4/data_15kshort/tmp.len \n",
"sort: échec d'écriture: 'sortie standard': Relais brisé (pipe)\n",
"sort: erreur d'écriture\n",
"utils/subset_data_dir.sh: reducing #utt from 35574 to 15000\n"
]
}
],
"source": [
"# # Make some small data subsets for early system-build stages. Note, there are 29k\n",
"# # utterances in the train_clean_100 directory which has 100 hours of data.\n",
"# # For the monophone stages we select the shortest utterances, which should make it\n",
"# # easier to align the data from a flat start.\n",
"utils/subset_data_dir.sh --shortest $idata_kaldi/data 1000 $idata_kaldi/data_1kshort\n",
"utils/subset_data_dir.sh --shortest $idata_kaldi/data 5000 $idata_kaldi/data_5kshort\n",
"utils/subset_data_dir.sh --shortest $idata_kaldi/data 10000 $idata_kaldi/data_10kshort\n",
"utils/subset_data_dir.sh --shortest $idata_kaldi/data 15000 $idata_kaldi/data_15kshort\n",
"#utils/subset_data_dir.sh --shortest $idata_kaldi/data 15000 $idata_kaldi/data_15kshort\n",
"#utils/subset_data_dir.sh $idata_kaldi/data 20000 $idata_kaldi/data_20k\n",
"#utils/subset_data_dir.sh $idata_kaldi/data 25000 $idata_kaldi/data_25k"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train different monophone system with different size\n",
"Analyse number of gaussian and state used!!!\n",
"and evaluate this first step"
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"steps/train_mono.sh --boost-silence 1.25 --nj 32 --cmd run.pl --mem 64G data-ESTER-V2-noise/data_1kshort data-ESTER-V2-noise/lang exp-ESTER-V2-noise/mono1K\n",
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
"filter_scps.pl: warning: some input lines were output to multiple files\n",
"steps/train_mono.sh: Initializing monophone system.\n",
"steps/train_mono.sh: Compiling training graphs\n",
"steps/train_mono.sh: Aligning data equally (pass 0)\n",
"steps/train_mono.sh: Pass 1\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 2\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 3\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 4\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 5\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 6\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 7\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 8\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 9\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 10\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 11\n",
"steps/train_mono.sh: Pass 12\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 13\n",
"steps/train_mono.sh: Pass 14\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 15\n",
"steps/train_mono.sh: Pass 16\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 17\n",
"steps/train_mono.sh: Pass 18\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 19\n",
"steps/train_mono.sh: Pass 20\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 21\n",
"steps/train_mono.sh: Pass 22\n",
"steps/train_mono.sh: Pass 23\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 24\n",
"steps/train_mono.sh: Pass 25\n",
"steps/train_mono.sh: Pass 26\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 27\n",
"steps/train_mono.sh: Pass 28\n",
"steps/train_mono.sh: Pass 29\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 30\n",
"steps/train_mono.sh: Pass 31\n",
"steps/train_mono.sh: Pass 32\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 33\n",
"steps/train_mono.sh: Pass 34\n",
"steps/train_mono.sh: Pass 35\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 36\n",
"steps/train_mono.sh: Pass 37\n",
"steps/train_mono.sh: Pass 38\n",
"steps/train_mono.sh: Aligning data\n",
"steps/train_mono.sh: Pass 39\n",
"steps/diagnostic/analyze_alignments.sh --cmd run.pl --mem 64G data-ESTER-V2-noise/lang exp-ESTER-V2-noise/mono1K\n",
"analyze_phone_length_stats.py: WARNING: optional-silence SIL is seen only 41.9% of the time at utterance begin. This may not be optimal.\n",
"analyze_phone_length_stats.py: WARNING: optional-silence SIL is seen only 57.4% of the time at utterance end. This may not be optimal.\n",
"steps/diagnostic/analyze_alignments.sh: see stats in exp-ESTER-V2-noise/mono1K/log/analyze_alignments.log\n",
"3 warnings in exp-ESTER-V2-noise/mono1K/log/init.log\n",
"2 warnings in exp-ESTER-V2-noise/mono1K/log/analyze_alignments.log\n",
"1070 warnings in exp-ESTER-V2-noise/mono1K/log/align.*.*.log\n",
"122 warnings in exp-ESTER-V2-noise/mono1K/log/update.*.log\n",
"exp-ESTER-V2-noise/mono1K: nj=32 align prob=-97.99 over 0.24h [retry=1.2%, fail=0.0%] states=128 gauss=1001\n",
"steps/train_mono.sh: Done training monophone system in exp-ESTER-V2-noise/mono1K\n"
]
}
],
"source": [