Commit f20a373a authored by Jan Trmal's avatar Jan Trmal
Browse files

Finalizing the released recipe for OP1

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4341 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent af7e69f2
......@@ -11,11 +11,31 @@ a) Preparation: you need to make sure the BABEL data and the F4DE scoring softwa
b) If you plan to work on one or more languages, the following approach is advised.
aa) create empty directory somewhere according to your choice
ab) symlink all the directories here to that directory
ac) copy cmd.sh and path.sh (you will probably need to do some changes in these)
(
mkdir 206-zulu-llp; cd 206-zulu-llp
)
ab) copy cmd.sh and path.sh (you will probably need to do some changes in these)
especially pay attention to KALDI_ROOT in path.sh and possibly switch to using
run.pl in cmd.sh
(
cp /path/to/kaldi/egs/babel/s5b/{cmd.sh,path.sh} .
)
ac) symlink all the directories here to that directory
(
ln -s /path/to/kaldi/egs/babel/s5b/{conf,steps,utils,local} .
)
ad) link the necessary scripts ( see below )
{
ln -s /path/to/kaldi/egs/babel/s5b/run-1-main.sh .
}
ae) link the appropriate language-specific config file to lang.conf in
each directory.
(
206-zulu-llp$ ln -s conf/lang/206-zulu-limitedLP.official.conf lang.conf
)
Running the training scripts
===================================================
......@@ -42,3 +62,21 @@ determine the path inside the test.uem dataset.
./run-1-main.sh
./run-2a-nnet-ensemble-gpu.sh
./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/
./run-3b-bnf-sgmm.sh --semisupervised false
./run-3b-bnf-nnet.sh --semisupervised false
./run-2-segmentation.sh
./run-4-anydecode.sh --dir dev2h.seg
./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true
./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true
./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false
This diff is collapsed.
......@@ -13,7 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=20
......@@ -24,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -36,15 +44,25 @@ eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval
eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
evalpart1_data_list=/export/babel/data/splits/Cantonese_Babel101/evalpart1.babel101b-v0.4c.list
evalpart1_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm
evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1.ecf.xml
evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm
evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.annot.kwlist.xml
evalpart1_nj=21
#Shadow data files
shadow_data_dir=(
/export/babel/data/101-cantonese/release-current/conversational/dev
/export/babel/data/101-cantonese/release-current/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-dev+eval.utt.dat
shadow_data_list=(
/export/babel/data/splits/Cantonese_Babel101/dev.list
/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
shadow_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -61,13 +79,12 @@ numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt
cer=1
......@@ -77,4 +94,3 @@ word_ins_penalty=0.5
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,21 +13,14 @@ dev2h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=20
#RADICAL DEV8H data files
dev8h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev8h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.7hr.list
dev8h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev8h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm
dev8h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
dev8h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev8h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
dev8h_subset_ecf=true
dev8h_nj=32
#Official DEV data files
dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list
......@@ -35,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev/IARPA-babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -47,15 +44,33 @@ eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval
eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-eval.kwlist.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
evalpart1_data_list=/export/babel/data/splits/Cantonese_Babel101/evalpart1.babel101b-v0.4c.list
evalpart1_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.stm
evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1.ecf.xml
evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm
evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-evalpart1/IARPA-babel101b-v0.4c_conv-evalpart1.annot.kwlist.xml
evalpart1_nj=21
#Shadow data files
shadow_data_dir=(
/export/babel/data/101-cantonese/release-current/conversational/dev
/export/babel/data/101-cantonese/release-current/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-dev+eval.utt.dat
shadow_data_list=(
/export/babel/data/splits/Cantonese_Babel101/dev.list
/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel101b-v0.4c_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Cantonese_Babel101/babel101b-v0.4c_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
shadow_nj=64
unsup_data_dir=(
/export/babel/data/104-pashto/release-current/conversational/training/
)
unsup_data_list=(
/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list
)
unsup_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -72,13 +87,12 @@ numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt
cer=1
......@@ -88,4 +102,3 @@ word_ins_penalty=0.5
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,7 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-ut
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=18
......@@ -24,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-u
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -46,6 +54,26 @@ eval_nj=64
#evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml
#evalpart1_nj=32
#Shadow data files
shadow_data_dir=(
/export/babel/data/104-pashto/release-current/conversational/dev
/export/babel/data/104-pashto/release-current/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
shadow_data_list=(
/export/babel/data/splits/Pashto_Babel104/dev.list
/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
shadow_nj=64
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -73,4 +101,3 @@ lexicon_file=/export/babel/data/104-pashto/release-current/conversational/refere
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,7 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-ut
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
dev2h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=18
......@@ -24,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-u
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
dev10h_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -36,15 +44,33 @@ eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eva
eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
#evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval
#evalpart1_data_list=
#evalpart1_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
#evalpart1_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.stm
#evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1.ecf.xml
#evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm
#evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-evalpart1/IARPA-babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml
#evalpart1_nj=32
#Shadow data files
shadow_data_dir=(
/export/babel/data/104-pashto/release-current/conversational/dev
/export/babel/data/104-pashto/release-current/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
shadow_data_list=(
/export/babel/data/splits/Pashto_Babel104/dev.list
/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Pashto_Babel104/babel104b-v0.4bY_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.kwlist2.xml
)
shadow_nj=64
unsup_data_dir=(
/export/babel/data/104-pashto/release-current/conversational/training/
)
unsup_data_list=(
/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.untranscribed.list
)
unsup_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -73,4 +99,3 @@ lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversation
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,7 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-u
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=18
......@@ -24,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -46,6 +54,25 @@ evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
#Shadow data files
shadow_data_dir=(
/export/babel/data/105-turkish/release-current-b/conversational/dev
/export/babel/data/105-turkish/release-current-b/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
shadow_data_list=(
/export/babel/data/splits/Turkish_Babel105/dev.list
/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
shadow_nj=64
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -68,9 +95,9 @@ lexiconFlags="--oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=./conf/glm
lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt
#http://demo.icu-project.org/icu-bin/translit
icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();' )
#icu_opt=(--use-icu true --icu-transform "'\\\\\\\\İ > i;I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)̇ > i \\\\\\\\\\\\\\\$1 ;I > ı;::Any-Lower();'" )
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,7 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-u
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
dev2h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=18
......@@ -24,7 +28,11 @@ dev10h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev/IARPA-babel105b-v0.4_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
dev10h_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
dev10h_nj=32
......@@ -36,15 +44,33 @@ eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.
eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
#Shadow data files
shadow_data_dir=(
/export/babel/data/105-turkish/release-current-b/conversational/dev
/export/babel/data/105-turkish/release-current-b/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
shadow_data_list=(
/export/babel/data/splits/Turkish_Babel105/dev.list
/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.ecf.xml
shadow_kwlist_file=/export/babel/data/splits/Turkish_Babel105/babel105b-v0.4_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
shadow_nj=64
unsup_data_dir=(
/export/babel/data/105-turkish/release-current-b/conversational/training/
)
unsup_data_list=(
/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.untranscribed.list
)
unsup_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -74,4 +100,3 @@ icu_opt=(--use-icu true --icu-transform 'İ > i;I([^[:ccc=Not_Reordered:][:ccc=A
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -13,12 +13,11 @@ dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.d
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
dev2h_more_kwlists=( limitedLP=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
fullLP=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
radical=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
eval=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml
)
dev2h_kwlist_file=/export/babel/data/splits/Tagalog_Babel106/babel106b-v0.2g_conv-dev.kwlist.xml
dev2h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml
)
dev2h_subset_ecf=true
dev2h_nj=23
......@@ -30,15 +29,19 @@ dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-de
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev/IARPA-babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
dev10h_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml
)
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
eval_nj=64
#Official (POST-)EVAL evaluation data portion
......@@ -51,6 +54,33 @@ evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
#Shadow data files
shadow_data_dir=(
/export/babel/data/splits/Tagalog_Babel106/dev.list
/export/babel/data/106-tagalog/release-current/conversational/eval
)
shadow_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/db-tag-dev+eval-utt.dat
shadow_data_list=(
/export/babel/data/splits/Tagalog_Babel106/dev.list
/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
)
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel201b-v0.2b_conv-eval.ecf.xml
shadow_kwlist_file=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
shadow_more_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel106b-v0.2g_conv-dev.kwlist2.xml
)
shadow_nj=64
unsup_data_dir=(
/export/babel/data/106-tagalog/release-current/conversational/training/
)
unsup_data_list=(
/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.untranscribed.list
)
unsup_nj=64
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -77,4 +107,3 @@ lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/refer