Commit 0bc71728 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: merging changes from trunk; some further small code-level...

sandbox/dan2: merging changes from trunk; some further small code-level optimizations to determinization code (which I just realized were done in sandbox/dan2; I'll now merge those back to trunk.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3087 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 4da87782
# include common settings for limitedLP systems.
. conf/common.limitedLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/106-tagalog/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Tagalog_Babel106/train.LimitedLP.official.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev2hr.list
dev2h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
dev2h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev2h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=23
#Official DEV data files
dev10h_data_dir=/export/babel/data/106-tagalog/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Tagalog_Babel106/dev.list
dev10h_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
dev10h_stm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev/babel106b-v0.2g_conv-dev.mitllfa3.rttm
dev10h_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-dev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/106-tagalog/release-current/conversational/eval
eval_data_list=/export/babel/data/splits/Tagalog_Babel106/eval.babel106b-v0.2g.list
eval_ecf_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.ecf.xml
eval_kwlist_file=/export/babel/data/scoring/IndusDB/babel106b-v0.2g_conv-eval.kwlist2.xml
eval_data_cmudb=/export/babel/data/splits/Tagalog_Babel106/uem/v18/db-tag-utt.dat
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=2500
numGaussTri2=36000
numLeavesTri3=2500
numGaussTri3=36000
numLeavesMLLT=2500
numGaussMLLT=36000
numLeavesSAT=2500
numGaussSAT=36000
numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_ffv=true
use_pitch=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/106-tagalog/release-current/conversational/reference_materials/lexicon.txt
#keyword search settings
duptime=0.5
case_insensitive=true
# include common settings for fullLP systems.
. conf/common.fullLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#RADICAL DEV data files
dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
dev10h_sph_data_cmudb=
dev10h_sph_stm_file=
dev10h_sph_ecf_file=
dev10h_sph_rttm_file=
dev10h_sph_kwlist_file=
dev10h_sph_subset_ecf=true
dev10h_sph_nj=32
#RADICAL DEV data files
dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
dev10h_wav_data_cmudb=
dev10h_wav_stm_file=
dev10h_wav_ecf_file=
dev10h_wav_rttm_file=
dev10h_wav_kwlist_file=
dev10h_wav_subset_ecf=true
dev10h_wav_nj=13
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
eval_data_list=
eval_ecf_file=
eval_kwlist_file=
eval_data_cmudb=
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=1000
numGaussTri2=20000
numLeavesTri3=6000
numGaussTri3=75000
numLeavesMLLT=6000
numGaussMLLT=75000
numLeavesSAT=6000
numGaussSAT=75000
numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=false
use_ffv=false
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
#keyword search settings
duptime=0.5
case_insensitive=true
# include common settings for fullLP systems.
. conf/common.fullLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#RADICAL DEV data files
dev10h_sph_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev10h_sph_data_list=/export/babel/data/splits/Haitian_Babel201/dev.sph.list
dev10h_sph_data_cmudb=
dev10h_sph_stm_file=
dev10h_sph_ecf_file=
dev10h_sph_rttm_file=
dev10h_sph_kwlist_file=
dev10h_sph_subset_ecf=true
dev10h_sph_nj=32
#RADICAL DEV data files
dev10h_wav_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev10h_wav_data_list=/export/babel/data/splits/Haitian_Babel201/dev.wav.list
dev10h_wav_data_cmudb=
dev10h_wav_stm_file=
dev10h_wav_ecf_file=
dev10h_wav_rttm_file=
dev10h_wav_kwlist_file=
dev10h_wav_subset_ecf=true
dev10h_wav_nj=13
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/201-haitian/release-current/conversational/eval
eval_data_list=
eval_ecf_file=
eval_kwlist_file=
eval_data_cmudb=
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=1000
numGaussTri2=20000
numLeavesTri3=6000
numGaussTri3=75000
numLeavesMLLT=6000
numGaussMLLT=75000
numLeavesSAT=6000
numGaussSAT=75000
numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.txt
#keyword search settings
duptime=0.5
case_insensitive=true
# include common settings for limitedLP systems.
. conf/common.limitedLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/201-haitian/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Haitian_Babel201/train.LimitedLP.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201//dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/201-haitian//release-current/conversational/eval
eval_data_list=
eval_ecf_file=
eval_kwlist_file=
eval_data_cmudb=
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=2500
numGaussTri2=36000
numLeavesTri3=2500
numGaussTri3=36000
numLeavesMLLT=2500
numGaussMLLT=36000
numLeavesSAT=2500
numGaussSAT=36000
numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/201-haitian/release-current/conversational/reference_materials/lexicon.sub-train.txt
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -85,8 +85,8 @@ numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
use_pitch=false
use_ffv=false
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
......
# include common settings for fullLP systems.
. conf/common.fullLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Zulu_Babel206/train.FullLP.list
train_nj=32
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#RADICAL DEV data files
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
dev10h_sph_data_cmudb=
dev10h_sph_stm_file=
dev10h_sph_ecf_file=
dev10h_sph_rttm_file=
dev10h_sph_kwlist_file=
dev10h_sph_subset_ecf=true
dev10h_sph_nj=32
#RADICAL DEV data files
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
dev10h_wav_data_cmudb=
dev10h_wav_stm_file=
dev10h_wav_ecf_file=
dev10h_wav_rttm_file=
dev10h_wav_kwlist_file=
dev10h_wav_subset_ecf=true
dev10h_wav_nj=13
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
eval_data_list=
eval_ecf_file=
eval_kwlist_file=
eval_data_cmudb=
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=1000
numGaussTri2=20000
numLeavesTri3=6000
numGaussTri3=75000
numLeavesMLLT=6000
numGaussMLLT=75000
numLeavesSAT=6000
numGaussSAT=75000
numGaussUBM=800
numLeavesSGMM=10000
numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.txt
#keyword search settings
duptime=0.5
case_insensitive=true
......@@ -37,6 +37,28 @@ devtrain_rttm_file=
devtrain_kwlist_file=
devtrain_nj=64
#RADICAL DEV data files
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
dev10h_sph_data_cmudb=
dev10h_sph_stm_file=
dev10h_sph_ecf_file=
dev10h_sph_rttm_file=
dev10h_sph_kwlist_file=
dev10h_sph_subset_ecf=true
dev10h_sph_nj=32
#RADICAL DEV data files
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
dev10h_wav_data_cmudb=
dev10h_wav_stm_file=
dev10h_wav_ecf_file=
dev10h_wav_rttm_file=
dev10h_wav_kwlist_file=
dev10h_wav_subset_ecf=true
dev10h_wav_nj=13
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
eval_data_list=
......@@ -74,8 +96,8 @@ numGaussSGMM=18000
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
use_pitch=false
use_ffv=false
# Scoring protocols (dummy GLM file to appease the scoring script)
#glmFile=./conf/glm
lexicon_file=/export/babel/data/206-zulu/release-current/conversational/reference_materials/lexicon.sub-train.txt
......
# include common settings for limitedLP systems.
. conf/common.limitedLP || exit 1;
#speech corpora files location
train_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
train_data_list=/export/babel/data/splits/Zulu_Babel206/train.LimitedLP.list
train_nj=16
#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_nj=32
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
devtrain_data_cmudb=
devtrain_stm_file=
devtrain_ecf_file=
devtrain_rttm_file=
devtrain_kwlist_file=
devtrain_nj=64
#RADICAL DEV data files
dev10h_sph_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_sph_data_list=/export/babel/data/splits/Zulu_Babel206/dev.sph.list
dev10h_sph_data_cmudb=
dev10h_sph_stm_file=
dev10h_sph_ecf_file=
dev10h_sph_rttm_file=
dev10h_sph_kwlist_file=
dev10h_sph_subset_ecf=true
dev10h_sph_nj=32
#RADICAL DEV data files
dev10h_wav_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev10h_wav_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
dev10h_wav_data_cmudb=
dev10h_wav_stm_file=
dev10h_wav_ecf_file=
dev10h_wav_rttm_file=
dev10h_wav_kwlist_file=
dev10h_wav_subset_ecf=true
dev10h_wav_nj=13
#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data/206-zulu/release-current/conversational/eval
eval_data_list=
eval_ecf_file=
eval_kwlist_file=
eval_data_cmudb=
eval_nj=64
#Official (POST-)EVAL evaluation data portion
evalpart1_data_dir=
evalpart1_data_list=
evalpart1_data_cmudb=
evalpart1_stm_file=
evalpart1_ecf_file=
evalpart1_rttm_file=
evalpart1_kwlist_file=
evalpart1_nj=21
# Acoustic model parameters
numLeavesTri1=1000
numGaussTri1=10000
numLeavesTri2=2500
numGaussTri2=36000
numLeavesTri3=2500
numGaussTri3=36000
numLeavesMLLT=2500
numGaussMLLT=36000
numLeavesSAT=2500
numGaussSAT=36000
numGaussUBM=750
numLeavesSGMM=5000
numGaussSGMM=18000