Commit f5dd4dd4 authored by Jan Trmal's avatar Jan Trmal
Browse files

BABEL: major streamlining of the workflow (and thus higher code reuse),

improved configuration scheme


git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@2772 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 6c0f56be
......@@ -4,24 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training
train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list
dev2h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list
dev10h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.stm
lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list
eval_data_ecf=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
eval_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
evalpart1_data_list=/export/babel/data/splits/Cantonese_Babel101/evalpart1.babel101b-v0.4c.list
evalpart1_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
evalpart1_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.stm
evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1.ecf.xml
evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm
evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.annot.kwlist.xml
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -37,31 +61,21 @@ numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
use_pitch=true
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
#glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
lexicon_file=/export/babel/data/101-cantonese/release-current/conversational/reference_materials/lexicon.txt
cer=1
train_nj=32
decode_nj=20
dev10h_nj=32
max_index_states=150000
word_ins_penalty=0.5
#keyword search settings
duptime=0.5
case_insensitive=true
ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
#rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa3.rttm
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
......@@ -4,23 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/101-cantonese/release-current/conversational/training
train_data_list=/export/babel/data/splits/Cantonese_Babel101/train.LimitedLP.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.3hr.list
dev2h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/101-cantonese/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Cantonese_Babel101/dev.list
lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt
dev10h_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
eval_data_list=/export/babel/data/splits/Cantonese_Babel101/eval.babel101b-v0.4c.list
eval_data_ecf=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
eval_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=/export/babel/data/101-cantonese/release-current/conversational/eval
evalpart1_data_list=/export/babel/data/splits/Cantonese_Babel101/evalpart1.babel101b-v0.4c.list
evalpart1_data_cmudb=/export/babel/data/splits/Cantonese_Babel101/uem/db-v8-utt.dat
evalpart1_stm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.stm
evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1.ecf.xml
evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.mitllfa3.rttm
evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-evalpart1/babel101b-v0.4c_conv-evalpart1.annot.kwlist.xml
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -36,31 +61,21 @@ numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
use_pitch=true
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
#glmFile=/export/babel/data/splits/Cantonese_Babel101/cantonese.glm
lexicon_file=/export/babel/data/101-cantonese/release-babel101b-v0.4c_sub-train1/conversational/reference_materials/lexicon.sub-train1.txt
cer=1
train_nj=16
decode_nj=20
dev10h_nj=32
max_index_states=150000
word_ins_penalty=0.5
#keyword search settings
duptime=0.5
case_insensitive=true
ecf_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.ecf.xml
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev.kwlist.xml
rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa2.rttm
#rttm_file=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-dev/babel101b-v0.4c_conv-dev.mitllfa3.rttm
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
......@@ -4,23 +4,37 @@
#speech corpora files location
train_data_dir=/export/babel/data/103-bengali/release-current/conversational/training
train_data_list=/export/babel/data/splits/Bengali_Babel103/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=12
#Official DEV data files
dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.txt
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval
#eval_data_list=/export/babel/data/splits/Bengali_Babel103/eval.babel101b-v0.4c.list
#eval_data_ecf=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
#eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
#eval_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/db-v8-utt.dat
eval_nj=64
#Official EVAL period evaluation data files (not released yet)
#eval_data_dir=
#eval_data_list=
#eval_data_cmudb=
#eval_ecf_file=
#eval_kwlist_file=
#eval_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -37,28 +51,19 @@ numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
use_pitch=true
lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.txt
cer=0
train_nj=32
decode_nj=12
dev10h_nj=32
#keyword search settings
duptime=0.5
case_insensitive=true
ecf_file=
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=
rttm_file=
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
......@@ -4,23 +4,37 @@
#speech corpora files location
train_data_dir=/export/babel/data/103-bengali/release-current/conversational/training
train_data_list=/export/babel/data/splits/Bengali_Babel103/train.LimitedLP.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=12
#Official DEV data files
dev10h_data_dir=/export/babel/data/103-bengali/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Bengali_Babel103/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.sub-train.txt
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/103-bengali/release-current/conversational/eval
#eval_data_list=/export/babel/data/splits/Bengali_Babel103/eval.babel101b-v0.4c.list
#eval_data_ecf=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.ecf.xml
#eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel101b-v0.4c_conv-eval.kwlist.xml
#eval_data_cmudb=/export/babel/data/splits/Bengali_Babel103/uem/db-v8-utt.dat
eval_nj=64
#Official EVAL period evaluation data files (not released yet)
#eval_data_dir=
#eval_data_list=
#eval_data_cmudb=
#eval_ecf_file=
#eval_kwlist_file=
#eval_nj=64
# Acoustic model parameters
numLeavesTri1=1000
......@@ -37,28 +51,21 @@ numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
use_pitch=true
lexicon_file=/export/babel/data/103-bengali/release-current/conversational/reference_materials/lexicon.sub-train.txt
cer=0
train_nj=16
decode_nj=12
dev10h_nj=32
max_index_states=150000
word_ins_penalty=0.5
#keyword search settings
duptime=0.5
case_insensitive=true
ecf_file=
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=
rttm_file=
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
......@@ -4,23 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training
train_data_list=/export/babel/data/splits/Pashto_Babel104/train.FullLP.list
train_nj=32
#RADICAL DEV2H data files
dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.kwlist2.xml
dev2h_subset_ecf=true
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list
lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt
dev10h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.kwlist2.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/
eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list
eval_data_ecf=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
#evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval
#evalpart1_data_list=
#evalpart1_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
#evalpart1_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.stm
#evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1.ecf.xml
#evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm
#evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml
#evalpart1_nj=32
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -41,22 +66,11 @@ oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=./conf/glm
#glmFile=./conf/glm
lexicon_file=/export/babel/data/104-pashto/release-current/conversational/reference_materials/lexicon.txt
train_nj=32
decode_nj=26
#keyword search settings
duptime=0.5
case_insensitive=false
ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev.ecf.xml
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev.kwlist.xml
rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
case_insensitive=true
......@@ -4,23 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training
train_data_list=/export/babel/data/splits/Pashto_Babel104/train.LimitedLP.list
train_nj=16
#RADICAL DEV2H data files
dev2h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Pashto_Babel104/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.kwlist2.xml
dev2h_subset_ecf=true
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list
lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversational/reference_materials/lexicon.sub-train.txt
dev10h_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.kwlist2.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval/
eval_data_list=/export/babel/data/splits/Pashto_Babel104/eval.babel104b-v0.4bY.list
eval_data_ecf=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
#evalpart1_data_dir=/export/babel/data/104-pashto/release-current/conversational/eval
#evalpart1_data_list=
#evalpart1_data_cmudb=/export/babel/data/splits/Pashto_Babel104/uem/db-v7_dev+eval-utt.dat
#evalpart1_stm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.stm
#evalpart1_ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1.ecf.xml
#evalpart1_rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.mitllfa3.rttm
#evalpart1_kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-evalpart1/babel104b-v0.4bY_conv-evalpart1.annot.kwlist2.xml
#evalpart1_nj=32
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -41,23 +66,11 @@ oovSymbol="<unk>"
lexiconFlags="--romanized --oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=./conf/glm
#glmFile=./conf/glm
lexicon_file=/export/babel/data/104-pashto/release-current-subtrain/conversational/reference_materials/lexicon.sub-train.txt
train_nj=16
decode_nj=18
#keyword search settings
duptime=0.5
case_insensitive=false
ecf_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev.ecf.xml
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev.kwlist.xml
#rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.rttm
rttm_file=/export/babel/data/scoring/IndusDB/babel104b-v0.4bY_conv-dev/babel104b-v0.4bY_conv-dev.mitllfa3.rttm
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
case_insensitive=true
......@@ -4,23 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
dev2h_data_list=/export/babel/data/splits/Turkish_Babel105/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
dev10h_data_list=/export/babel/data/splits/Turkish_Babel105/dev.list
lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt
dev10h_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/eval
eval_data_list=/export/babel/data/splits/Turkish_Babel105/eval.babel105b-v0.4.list
eval_data_ecf=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.ecf.xml
eval_data_kwlist=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Turkish_Babel105/uem/db-dev+eval-v7-utt.dat
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-eval.kwlist2.xml
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
......@@ -42,21 +67,10 @@ lexiconFlags="--oov <unk>"
# Scoring protocols (dummy GLM file to appease the scoring script)
glmFile=./conf/glm
lexicon_file=/export/babel/data/105-turkish/release-current-b/conversational/reference_materials/lexicon.txt
train_nj=32
decode_nj=18
#keyword search settings
duptime=0.5
case_insensitive=false
ecf_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.ecf.xml
#Generate a subset of the ecf file according to the {dev,eval}_data_list, if present
subset_ecf=true
kwlist_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev.kwlist.xml
rttm_file=/export/babel/data/scoring/IndusDB/babel105b-v0.4_conv-dev/babel105b-v0.4_conv-dev.mitllfa3.rttm
#Include the links and settings of the BABEL-only software
#This had been moved into the path.sh
#. /export/babel/data/software/env.sh
case_insensitive=true
......@@ -4,23 +4,48 @@
#speech corpora files location
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.LimitedLP.official.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
dev2h_data_list=/export/