Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
LINAGORA
L
LGS
Labs
kaldi-modelgen
Commits
24bd1bef
Commit
24bd1bef
authored
Aug 22, 2017
by
Abdelwahab HEBA
Browse files
clean scoring & add noise labelisation
parent
dc5d4274
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
160 additions
and
79 deletions
+160
-79
cmu_dict/fr.dict
cmu_dict/fr.dict
+2
-0
local/format_lms.sh
local/format_lms.sh
+2
-1
local/lm/parseESTERSyncV2_text.py
local/lm/parseESTERSyncV2_text.py
+1
-1
local/parseESTERSyncV2.py
local/parseESTERSyncV2.py
+11
-11
local/prepare_dict.sh
local/prepare_dict.sh
+7
-4
local/score.sh
local/score.sh
+4
-4
run.sh
run.sh
+133
-58
No files found.
cmu_dict/fr.dict
View file @
24bd1bef
...
...
@@ -45119,6 +45119,7 @@ hdw aa ch dd ei dd ou bb ll vv ei
hdz aa ch dd ai zz ai dd
hdz(2) aa ch dd ei zz ai dd
he ee
he' ee
head ii dd
health ai ll ff
hearst oe rr ss tt
...
...
@@ -102183,6 +102184,7 @@ zülle zz uu ll
âpreté aa pp rr ee tt ei
âtre aa tt rr
âtre(2) aa tt rr ee
ç ss
ça ss aa
çà ss aa
çà_et_là ss aa ei ll aa
local/format_lms.sh
View file @
24bd1bef
...
...
@@ -43,7 +43,8 @@ trap "rm -r $tmpdir" EXIT
mkdir
-p
$tmpdir
#for lm_suffix in tgsmall tgmed tglarge fglarge; do
for
lm_suffix
in
tglarge french-small tgmix
;
do
for
lm_suffix
in
tglarge french-small
;
do
#for lm_suffix in linto1; do
# tglarge is prepared by a separate command, called from run.sh; we don't
# want to compile G.fst for tglarge, as it takes a while.
test
=
${
src_dir
}
_test_
${
lm_suffix
}
...
...
local/lm/parseESTERSyncV2_text.py
View file @
24bd1bef
...
...
@@ -253,7 +253,7 @@ if __name__=="__main__":
#print("Je rentre dans has_attrib_speaker et element.tail not null")
#print(str(Element.tag))
#print(str(Element.tail))
if
Element
.
tag
==
"Sync"
or
Element
.
tag
==
"Background"
:
if
Element
.
tag
==
"Sync"
:
#print("Je rentre Sync+Background"+ text +"| et le next c'est "+ Element.tail)
#print(Element.tag+" "+Element.tail)
Time_start_current_sync
=
Element
.
get
(
'time'
)
...
...
local/parseESTERSyncV2.py
View file @
24bd1bef
...
...
@@ -263,7 +263,7 @@ if __name__=="__main__":
#print("Je rentre dans has_attrib_speaker et element.tail not null")
#print(str(Element.tag))
#print(str(Element.tail))
if
Element
.
tag
==
"Sync"
or
Element
.
tag
==
"Background"
:
if
Element
.
tag
==
"Sync"
:
#print("Je rentre Sync+Background"+ text +"| et le next c'est "+ Element.tail)
#print(Element.tag+" "+Element.tail)
Time_start_current_sync
=
Element
.
get
(
'time'
)
...
...
@@ -300,27 +300,27 @@ if __name__=="__main__":
# if Element.get('type')=='noise':
# ===== Respiration
if
Element
.
get
(
'desc'
)
==
'r'
or
Element
.
get
(
'desc'
)
==
'i'
or
Element
.
get
(
'desc'
)
==
'e'
or
Element
.
get
(
'desc'
)
==
'n'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<breath>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'pf'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<blowshard>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
# ===== Bruits bouches
elif
Element
.
get
(
'desc'
)
==
'tx'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<cough>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'bg'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<glottisblow>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'bb'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<noisemouth>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'rire'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<laugh>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'sif'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<whistling>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'ch'
or
Element
.
get
(
'desc'
)
==
'ch-'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<whisperedvoice>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
# ====== Bruit exterieus a l'acte de parole
elif
Element
.
get
(
'desc'
)
==
'b'
or
Element
.
get
(
'desc'
)
==
'pap'
or
Element
.
get
(
'desc'
)
==
'mic'
or
Element
.
get
(
'desc'
)
==
'conv'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<noise>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
elif
Element
.
get
(
'desc'
)
==
'top'
:
text
=
text
+
" "
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
text
=
text
+
"
<top>
"
+
Element
.
tail
.
replace
(
'
\n
'
,
''
)
# "pi" intellegible "pif" inaudible voir doc transcriber
#elif Element.get('type')=='pronounce':
# text=text+" "+Element.tail.replace('\n', '')
...
...
local/prepare_dict.sh
View file @
24bd1bef
...
...
@@ -117,6 +117,7 @@ if [ $stage -le 3 ]; then
echo
"Preparing phone lists and clustering questions"
(
echo
SIL
;
echo
SPN
;
echo
NSN
;
echo
LAU
;
)
>
$silence_phones
#(echo SIL; echo SPN;) > $silence_phones
echo
SIL
>
$optional_silence
# nonsilence phones; on each line is a list of phones that correspond
# really to the same base phone.
...
...
@@ -140,12 +141,14 @@ if [ $stage -le 3 ]; then
fi
if
[
$stage
-le
4
]
;
then
# TCOF
#(echo '!sil SIL'; echo '<spoken_noise> SPN'; echo '<UNK> SPN'; echo '<laugh> LAU'; echo '<noise> NSN') |\
#(echo '<unk> SPN'; echo '<laugh> LAU'; echo '<noise> NSN'; echo '<top> NSN';\
# echo '<whispered_voice> NSN'; echo '<breath> SPN'; echo '<blows_hard> NSN'; echo '<cough> SPN'; echo '<glottis_blow> SPN';\
# echo '<noise_mouth> SPN';echo '<whistling> NSN' ) |\
# ESTER
(
echo
'<unk> SPN'
;
echo
'<laugh> LAU'
;
echo
'<noise> NSN'
;
echo
'<top> NSN'
;
\
echo
'<whisperedvoice> NSN'
;
echo
'<noisemouth>'
SPN
;
echo
'<breath> SPN'
;
echo
'<blowshard> NSN'
;
echo
'<cough> SPN'
;
echo
'<glottisblow> SPN'
;
\
echo
'<noisemouth> SPN'
;
echo
'<whistling> NSN'
)
|
\
# ESTER without noise states
(
echo
'!sil SIL'
)
|
\
#
(echo '!sil SIL'
; echo '<UNK> SPN'
) |\
cat
-
$lexicon_raw_nosil
|
sort
|
uniq
>
$dst_dir
/lexicon.txt
echo
"Lexicon text file saved as:
$dst_dir
/lexicon.txt"
fi
...
...
local/score.sh
View file @
24bd1bef
...
...
@@ -42,11 +42,11 @@ done
mkdir
-p
$dir
/scoring/log
# A changer suivant les balises utilisees dans la normalisation
# TCOF
cat
$data
/text |
sed
's:<noise>::g'
|
sed
's:<spoken_noise>::g'
|
sed
's:<laugh>::g'
>
$dir
/scoring/test_filt.txt
#
cat $data/text | sed 's:<noise>::g' | sed 's:<spoken_noise>::g' | sed 's:<laugh>::g' > $dir/scoring/test_filt.txt
# ESTER
#
cat $data/text | sed 's:<noise>::g' | sed 's:<breath>::g' | sed 's:<laugh>::g' |\
#
sed 's:<blows
_
hard>::g' | sed 's:<cough>::g' | sed 's:<glottis
_
blow>::g' | sed 's:<noise
_
mouth>::g' |\
#
sed 's:<whistling>::g' | sed 's:<whispered
_
voice>::g' | sed 's:<top>::g' > $dir/scoring/test_filt.txt
cat
$data
/text |
sed
's:<noise>::g'
|
sed
's:<breath>::g'
|
sed
's:<laugh>::g'
|
\
sed
's:<blowshard>::g'
|
sed
's:<cough>::g'
|
sed
's:<glottisblow>::g'
|
sed
's:<noisemouth>::g'
|
\
sed
's:<whistling>::g'
|
sed
's:<whisperedvoice>::g'
|
sed
's:<top>::g'
|
sed
's:<breath>::g'
>
$dir
/scoring/test_filt.txt
for
wip
in
$(
echo
$word_ins_penalty
|
sed
's/,/ /g'
)
;
do
$cmd
LMWT
=
$min_lmwt
:
$max_lmwt
$dir
/scoring/log/best_path.LMWT.
$wip
.log
\
lattice-scale
--inv-acoustic-scale
=
LMWT
"ark:gunzip -c
$dir
/lat.*.gz|"
ark:-
\|
\
...
...
run.sh
View file @
24bd1bef
S
#!/bin/bash
#!/bin/bash
# Copyright 2017 Abdel HEBA @Linagora
# Pense a ajouter utils/fix_data_dir.sh data/test to fix utterance error
# Running on Koios J=12
# data dir
#
.
./cmd.sh
.
./path.sh
idata_kaldi
=
data-microsoft-mfcc
exp_kaldi
=
exp-microsoft-mfcc
# you might not want to do this for interactive shells.
#set -e
# format the data as Kaldi data directories
#train dev
# TCOF
# Data prepare: TCOF - ESTER
# TCOF:
idata_kaldi
=
data-microsoft-mfcc
exp_kaldi
=
exp-microsoft-mfcc
data
=
/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus
LM_train_text
=
/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus/train
for
part
in
meeting_best_microsoft meeting_test
;
do
# ESTER:
data
=
/home/lingora/Documents/Linagora/Data/ESTER/Corpus/ESTER/DGA/Phase1
idata_kaldi
=
data-ESTER-V2-noise
exp_kaldi
=
exp-ESTER-V2-noise
for
part
in
data
;
do
# use underscore-separated names in data directories.
echo
"prepare
$part
"
local
/data_prepTCOF.sh
$data
/
$part
$idata_kaldi
/
$part
#local/data_prepTCOF.sh $data/$part $idata_kaldi/$part
# probleme event (URL:)
local
/data_prepESTER.sh
$data
/
$part
$idata_kaldi
/
$part
done
# Evaluate SNR for each segment
evaluate_snr
=
eval-snr
mkdir
eval-snr
for
part
in
meeting_best_microsoft meeting_test
;
do
#evaluate_snr=eval-snr
#mkdir eval-snr
#for part in meeting_best_microsoft meeting_test; do
# echo "Evaluate $part"
# local/evaluation/evaluate_snr.sh $idata_kaldi/$part $evaluate_snr
#done
# Evaluate SNR for each segment of ESTER
evaluate_snr
=
eval-snr-ESTER/Eval2005
mkdir
-p
$evaluate_snr
for
part
in
data
;
do
echo
"Evaluate
$part
"
local
/evaluation/evaluate_snr.sh
$idata_kaldi
/
$part
$evaluate_snr
done
...
...
@@ -44,33 +53,44 @@ done
# ## well as some intermediate data(e.g. the normalized text used for LM training),
# ## are available for download at http://www.openslr.org/11/
# OOOOOOK Train_lm
# TCOF
#LM_train_text=/home/lingora/Documents/Linagora/Data/Tcof/tcof/3/Corpus/train
# ESTER
LM_train_text
=
/home/lingora/Documents/Linagora/Data/ESTER/Corpus/ESTER/DGA/Phase1/data
local
/lm/train_lm.sh
$LM_train_text
\
$idata_kaldi
/local/lm/norm/tmp
$idata_kaldi
/local/lm/norm/norm_texts
$idata_kaldi
/local/lm
# check characters:
# awk '{for(i=1;i<=NF;i++)if(!a[$i]++)print $i"\n"}' ORS= FS= $idata_kaldi/local/lm/meeting-vocab.txt | sort -b
# Learning Grapheme to phonem
## Optional G2P training scripts.
## As the LM training scripts above, this script is intended primarily to
## document our G2P model creation process
# OOOOOOk g2p
# OOOOOOk g2p
: done
#local/g2p/train_g2p.sh cmu_dict data/local/lm
##### OOOOOOK
# # when "--stage 3" option is used below we skip the G2P steps, and use the
# # if lexicon are already downloaded from Elyes's works then Stage=3 else Stage=0
# print number of phonem used in french
cat
cmu_dict/fr.dict |
awk
'{$1="";print $0}'
|
tr
' '
'\n'
|
sort
-b
|
uniq
-c
mkdir
-p
$idata_kaldi
/local/dict/cmudict
cp
cmu_dict/fr.dict
$idata_kaldi
/local/dict/fr.dict
#cp cmu_dict/fr.dict data/local/dict/cmudict
local
/prepare_dict.sh
--stage
3
--nj
1
--cmd
"
$train_cmd
"
\
$idata_kaldi
/local/lm
$idata_kaldi
/local/lm
$idata_kaldi
/local/dict
mkdir
-p
$idata_kaldi
/local/lm/g2p
cp
g2p/model-5
$idata_kaldi
/local/lm/g2p
###### Prepare dict: add words which doesn't exist in dictionnary + config files...
local
/prepare_dict.sh
--stage
0
--nj
4
--cmd
"
$train_cmd
"
\
$idata_kaldi
/local/lm
$idata_kaldi
/local/lm/g2p
$idata_kaldi
/local/dict
###### OOOOOOK
utils/prepare_lang.sh
$idata_kaldi
/local/dict
\
"<
UNK
>"
$idata_kaldi
/local/lang_tmp
$idata_kaldi
/lang
"<
unk
>"
$idata_kaldi
/local/lang_tmp
$idata_kaldi
/lang
export
LC_ALL
=
fr_FR.UTF-8
###### OOOOOOK
local
/format_lms.sh
--src-dir
$idata_kaldi
/lang
$idata_kaldi
/local/lm
local
/format_lms.sh
--src-dir
$idata_kaldi
/lang
$idata_kaldi
/local/lm
# # Create ConstArpaLm format language model for full 3-gram and 4-gram LMs
#utils/build_const_arpa_lm.sh data/local/lm/lm_tglarge.arpa.gz \
...
...
@@ -81,7 +101,8 @@ export LC_ALL=fr_FR.UTF-8
mfccdir
=
mfcc
plpdir
=
plp
fbankdir
=
fbank
for
part
in
meeting_best_microsoft meeting_test
;
do
#for part in meeting_best_microsoft meeting_test; do
for
part
in
data
;
do
#MFCC features
steps/make_mfcc.sh
--cmd
"
$train_cmd
"
--nj
4
$idata_kaldi
/
$part
$exp_kaldi
/make_mfcc/
$part
$mfccdir
steps/compute_cmvn_stats.sh
$idata_kaldi
/
$part
$exp_kaldi
/make_mfcc/
$part
$mfccdir
...
...
@@ -92,23 +113,26 @@ for part in meeting_best_microsoft meeting_test; do
#steps/make_fbank.sh --cmd "$train_cmd" --nj 4 $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir
#steps/compute_cmvn_stats.sh $idata_kaldi/$part $exp_kaldi/make_fbank/$part $fbankdir
done
utils/fix_data_dir.sh
$idata_kaldi
/data
utils/fix_data_dir.sh
$idata_kaldi
/meeting_best_microsoft
utils/fix_data_dir.sh
$idata_kaldi
/meeting_test
# # Make some small data subsets for early system-build stages. Note, there are 29k
# # utterances in the train_clean_100 directory which has 100 hours of data.
# # For the monophone stages we select the shortest utterances, which should make it
# # easier to align the data from a flat start.
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 15000 $idata_kaldi/train_15kshort
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 1000 $idata_kaldi/train_1kshort
#utils/subset_data_dir.sh --shortest $idata_kaldi/train 70000 $idata_kaldi/train_70kshort
#utils/subset_data_dir.sh $idata_kaldi/train 120000 $idata_kaldi/train_120k
#utils/subset_data_dir.sh data/train 120000 data/train_120k
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 1000
$idata_kaldi
/data_1kshort
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 5000
$idata_kaldi
/data_5kshort
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 10000
$idata_kaldi
/data_10kshort
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 15000
$idata_kaldi
/data_15kshort
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 15000
$idata_kaldi
/data_15kshort
utils/subset_data_dir.sh
$idata_kaldi
/data 20000
$idata_kaldi
/data_20k
utils/subset_data_dir.sh
$idata_kaldi
/data 25000
$idata_kaldi
/data_25k
# # train a monophone system
exp_mono
=
$exp_kaldi
/mono
_selected_microsoft
exp_mono
=
$exp_kaldi
/mono
10K
steps/train_mono.sh
--boost-silence
1.25
--nj
4
--cmd
"
$train_cmd
"
\
$idata_kaldi
/
meeting_best_microsof
t
$idata_kaldi
/lang
$exp_mono
$idata_kaldi
/
data_10kshor
t
$idata_kaldi
/lang
$exp_mono
# OK Jusqu'au monophone
# =================================================
# =================================================
# Evaluate PER for each meeting in training set
...
...
@@ -157,8 +181,8 @@ done
echo
"Filename,%WER,%nbWER,ins,del,sub"
>
$dir_evaluation
/evaluate_WER/WER_per_meeting.csv
for
test
in
meeting_test
;
do
# Decode WER
steps/decode.sh
--nj
2
--cmd
"
$decode_cmd
"
$exp_mono
/graph_
tglarge
\
$idata_kaldi
/
$test
$exp_mono
/decode_
tglarge
_
$test
steps/decode.sh
--nj
4
--cmd
"
$decode_cmd
"
$exp_mono
/graph_
french-small
\
$idata_kaldi
/
$test
$exp_mono
/decode_
french-small
_
$test
# Evaluate WER for each meeting in $ test
# symtab=$exp_mono/graph_tglarge/words.txt
# find $data/$test -mindepth 1 -maxdepth 1 -type d > $dir_evaluation/meeting_in_$test.txt
...
...
@@ -193,17 +217,27 @@ cat exp-eval/Evaluation/ppl_only/3gmixfrsmall_dev_test.csv | awk 'BEGIN{FS=",";O
paste
-d
, exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/evaluate_3gfrench-small.csv
paste
-d
, exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100h.csv
\
>
exp-eval/Evaluation/Final-eval/final_evaluation_lm_tg_100h.csv
steps/align_si.sh
--boost-silence
1.25
--nj
5
--cmd
"
$train_cmd
"
\
data-valid/train_file data-valid/lang exp-valid/mono exp-valid/mono_ali
# =========================== TRIPHONE =======================
# Align data
steps/align_si.sh
--boost-silence
1.25
--nj
4
--cmd
"
$train_cmd
"
\
$idata_kaldi
/data_15kshort
$idata_kaldi
/lang
$exp_mono
$exp_kaldi
/mono_ali15k_model10k
# # train a first delta + delta-delta triphone system on a subset of 70000 utterances
steps/train_deltas.sh
--boost-silence
1.25
--cmd
"
$train_cmd
"
\
2
000 1
0
000
$idata_kaldi
/
meeting_best_microsof
t
$idata_kaldi
/lang
$exp_
mono
$exp_kaldi
/tri1
_selected
3
000 1
5
000
$idata_kaldi
/
data_15kshor
t
$idata_kaldi
/lang
$exp_
kaldi
/mono_ali
$exp_kaldi
/tri1
steps/train_deltas.sh
--boost-silence
1.25
--cmd
"
$train_cmd
"
\
3000 15000
$idata_kaldi
/meeting_best_microsoft
$idata_kaldi
/lang
$exp_kaldi
/mono_ali
$exp_kaldi
/tri1_selected
steps/train_deltas.sh
--boost-silence
1.25
--cmd
"
$train_cmd
"
\
2000 10000
$idata_kaldi
/data_20kshort
$idata_kaldi
/lang_all exp-ESTER-all/mono_ali exp-ESTER-all/tri1_selected
# # decode using the tri1 model
# (
utils/mkgraph.sh
$idata_kaldi
/lang_test_tglarge
\
$exp_kaldi
/tri1_selected
$exp_kaldi
/tri1_selected/graph_tglarge
utils/mkgraph.sh
$idata_kaldi
/lang_test_french-small
\
$exp_kaldi
/tri1_selected
$exp_kaldi
/tri1_selected/graph_french-small
for
test
in
meeting_test
;
do
steps/decode.sh
--nj
2
--cmd
"
$decode_cmd
"
$exp_kaldi
/tri1_selected/graph_tglarge
\
$idata_kaldi
/
$test
$exp_kaldi
/tri1_selected/decode_tglarge_
$test
...
...
@@ -215,17 +249,20 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
done
# )&
steps/align_si.sh
--nj
5
--cmd
"
$train_cmd
"
\
data-valid/train_file data-valid/lang exp-valid/tri1 exp-valid/tri1_ali
# ================== Transformation LDA+MLLT ============
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 27000
$idata_kaldi
/data_27k
steps/align_si.sh
--nj
4
--cmd
"
$train_cmd
"
\
$idata_kaldi
/data_20k
$idata_kaldi
/lang
$exp_kaldi
/tri1
$exp_kaldi
/tri1_ali
# # train an LDA+MLLT system.
steps/train_lda_mllt.sh
--cmd
"
$train_cmd
"
\
--splice-opts
"--left-context=3 --right-context=3"
2500 15000
\
$idata_kaldi
/
meeting_best_microsoft
$idata_kaldi
/lang
$exp_kaldi
/tri1_selected
$exp_kaldi
/tri2b_selected
$idata_kaldi
/
data_27k
$idata_kaldi
/lang
$exp_kaldi
/tri1_selected
$exp_kaldi
/tri2b_selected
# # decode using the LDA+MLLT model
(
#
(
utils/mkgraph.sh
$idata_kaldi
/lang_test_tglarge
\
$exp_kaldi
/tri2b_selected
$exp_kaldi
/tri2b_selected/graph_tglarge
for
test
in
meeting_test
;
do
...
...
@@ -240,22 +277,26 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
# )&
# # Align a 10k utts subset using the tri2b model
# steps/align_si.sh --nj 10 --cmd "$train_cmd" --use-graphs true \
# data/train_10k data/lang_nosp exp/tri2b exp/tri2b_ali_10k
utils/subset_data_dir.sh
--shortest
$idata_kaldi
/data 31000
$idata_kaldi
/data_31k
steps/align_si.sh
--nj
4
--cmd
"
$train_cmd
"
--use-graphs
true
\
$idata_kaldi
/data_31k
$idata_kaldi
/lang
$exp_kaldi
/tri2b_selected
$exp_kaldi
/tri2b_ali
# # Train tri3b, which is LDA+MLLT+SAT on 10k utts
# steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
# data/train_10k data/lang_nosp exp/tri2b_ali_10k exp/tri3b
steps/train_sat.sh
--cmd
"
$train_cmd
"
2500 15000
\
$idata_kaldi
/meeting_best_microsoft
$idata_kaldi
/lang
$exp_kaldi
/tri2b_selected
$exp_kaldi
/tri3b
steps/train_sat.sh
--cmd
"
$train_cmd
"
4500 45000
\
$idata_kaldi
/data_31k
$idata_kaldi
/lang
$exp_kaldi
/tri2b_ali
$exp_kaldi
/tri3b
steps/train_sat.sh
--cmd
"
$train_cmd
"
2500 15000
\
data-microsoft-mfcc/meeting_best_microsoft data-ESTER/lang_new exp-ESTER/tri3b exp-ESTER/tri3b_bis
# # decode using the tri3b model
# (
utils/mkgraph.sh
$idata_kaldi
/lang_test_
tglarge
\
$exp_kaldi
/tri3b
$exp_kaldi
/tri3b/graph_test_
tglarge
utils/mkgraph.sh
$idata_kaldi
/lang_test_
french-small
\
$exp_kaldi
/tri3b
$exp_kaldi
/tri3b/graph_test_
french-small
for
test
in
meeting_test
;
do
steps/decode_fmllr.sh
--nj
2
--cmd
"
$decode_cmd
"
\
$exp_kaldi
/tri3b/graph_test_
tg
small
$idata_kaldi
/
$test
\
$exp_kaldi
/tri3b/decode_tg
large
_
$test
$exp_kaldi
/tri3b/graph_test_
french-
small
$idata_kaldi
/
$test
\
$exp_kaldi
/tri3b/decode_tg
sphinx
_
$test
# steps/lmrescore.sh --cmd "$decode_cmd" data/lang_nosp_test_{tgsmall,tgmed} \
# data/$test exp/tri3b/decode_nosp_{tgsmall,tgmed}_$test
# steps/lmrescore_const_arpa.sh \
...
...
@@ -297,14 +338,14 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
# # and re-create the lang directory.
# à comprendre
steps/get_prons.sh
--cmd
"
$train_cmd
"
\
$idata_kaldi
/
meeting_best_microsoft
$idata_kaldi
/lang
$exp_kaldi
/tri3b
$idata_kaldi
/
data_31k
$idata_kaldi
/lang
$exp_kaldi
/tri3b
utils/dict_dir_add_pronprobs.sh
--max-normalize
true
\
$idata_kaldi
/local/dict
\
$exp_kaldi
/tri3b/pron_counts_nowb.txt
$exp_kaldi
/tri3b/sil_counts_nowb.txt
\
$exp_kaldi
/tri3b/pron_bigram_counts_nowb.txt
$idata_kaldi
/local/dict_new
utils/prepare_lang.sh
$idata_kaldi
/local/dict_new
\
"<
UNK
>"
$idata_kaldi
/local/lang_tmp_new
$idata_kaldi
/lang_new
"<
unk
>"
$idata_kaldi
/local/lang_tmp_new
$idata_kaldi
/lang_new
local
/format_lms.sh
--src-dir
$idata_kaldi
/lang_new
$idata_kaldi
/local/lm
# utils/build_const_arpa_lm.sh \
...
...
@@ -319,11 +360,11 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
#utils/mkgraph.sh \
# $idata_kaldi/lang_new_test_french-small $exp_kaldi/tri3b $exp_kaldi/tri3b/graph_french-small
utils/mkgraph.sh
\
$idata_kaldi
/lang_new_test_
tgmix
$exp_kaldi
/tri3b
$exp_kaldi
/tri3b/graph_
tgmix
$idata_kaldi
/lang_new_test_
french-small
$exp_kaldi
/tri3b
$exp_kaldi
/tri3b/graph_
french-small
for
test
in
meeting_test
;
do
steps/decode_fmllr.sh
--nj
2
--cmd
"
$decode_cmd
"
\
$exp_kaldi
/tri3b/graph_
tgmix
$idata_kaldi
/
$tes
t
\
$exp_kaldi
/tri3b/decode_lang_new_
tgmix_
$tes
t
steps/decode_fmllr.sh
--nj
4
--cmd
"
$decode_cmd
"
\
$exp_kaldi
/tri3b/graph_
french-small
$idata_kaldi
/
data_10kshor
t
\
$exp_kaldi
/tri3b/decode_lang_new_
french-small_data_10kshor
t
# steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
# data/$test exp/tri4b/decode_{tgsmall,tgmed}_$test
# steps/lmrescore_const_arpa.sh \
...
...
@@ -355,10 +396,10 @@ paste -d , exp-eval/Evaluation/final_evaluation.csv exp-eval/Evaluation/lm_tg100
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
4
--mix-up
8000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
4
\
--num-hidden-layers
3
\
--pnorm-input-dim
2000
--pnorm-output-dim
400
\
--cmd
"
$decode_cmd
"
\
$idata_kaldi
/
meeting_best_microsoft
$idata_kaldi
/lang_new
$exp_kaldi
/tri3b
$exp_kaldi
/nn2
$idata_kaldi
/
data
$idata_kaldi
/lang_new
$exp_kaldi
/tri3b
$exp_kaldi
/nn2
for
test
in
meeting_test
;
do
...
...
@@ -394,19 +435,36 @@ done
# # ... and then combine the two sets into a 460 hour one
# utils/combine_data.sh \
# data/train_clean_460 data/train_clean_100 data/train_clean_360
# Phase1(30H) & Phase 2(50H): 90H
utils/combine_data.sh
\
$idata_kaldi
/DATA_1_2
$idata_kaldi
/data
$idata_kaldi
/DATA2
# # align the new, combined set, using the tri4b model
# steps/align_fmllr.sh --nj 40 --cmd "$train_cmd" \
# data/train_clean_460 data/lang exp/tri4b exp/tri4b_ali_clean_460
steps/align_fmllr.sh
--nj
4
--cmd
"
$train_cmd
"
\
$idata_kaldi
/DATA_1_2
$idata_kaldi
/lang_new
$exp_kaldi
/tri3b
$exp_kaldi
/tri3b_ali_90
# # create a larger SAT model, trained on the 460 hours of data.
# steps/train_sat.sh --cmd "$train_cmd" 5000 100000 \
# data/train_clean_460 data/lang exp/tri4b_ali_clean_460 exp/tri5b
steps/train_sat.sh
--cmd
"
$train_cmd
"
5000 100000
\
$idata_kaldi
/DATA_1_2
$idata_kaldi
/lang_new
$exp_kaldi
/tri3b_ali_90
$exp_kaldi
/tri5b
steps/train_sat.sh
--cmd
"
$train_cmd
"
7000 150000
\
$idata_kaldi
/DATA_1_2
$idata_kaldi
/lang_new
$exp_kaldi
/tri3b_ali_90
$exp_kaldi
/tri5b
# # decode using the tri5b model
# (
# utils/mkgraph.sh data/lang_test_tgsmall \
# exp/tri5b exp/tri5b/graph_tgsmall
utils/mkgraph.sh
$idata_kaldi
/lang_new_test_french-small
\
$exp_kaldi
/tri5b
$exp_kaldi
/tri5b/graph_tgsmall
steps/decode_fmllr.sh
--nj
4
--cmd
"
$decode_cmd
"
\
$exp_kaldi
/tri5b/graph_tgsmall
$idata_kaldi
/DATA
\
$exp_kaldi
/tri5b/decode_french-small_EVAL2005
# for test in test_clean test_other dev_clean dev_other; do
# steps/decode_fmllr.sh --nj 20 --cmd "$decode_cmd" \
# exp/tri5b/graph_tgsmall data/$test \
...
...
@@ -424,7 +482,24 @@ done
# # train a NN model on the 460 hour set
# local/nnet2/run_6a_clean_460.sh
#local/nnet2/run_6a_clean_460.sh
num_threads
=
4
parallel_opts
=
"-pe smp
$num_threads
"
minibatch_size
=
128
steps/nnet2/train_pnorm_fast.sh
--stage
-10
\
--samples-per-iter
400000
\
--num-epochs
7
--num-epochs-extra
3
\
--parallel-opts
"
$parallel_opts
"
\
--num-threads
"
$num_threads
"
\
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
4
--mix-up
10000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
4
\
--pnorm-input-dim
4000
--pnorm-output-dim
400
\
--cmd
"
$decode_cmd
"
\
$idata_kaldi
/DATA_1_2
$idata_kaldi
/lang_new
$exp_kaldi
/tri5b
$exp_kaldi
/nn90
# local/download_and_untar.sh $data $data_url train-other-500
# # prepare the 500 hour subset.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment