Commit 3e67d2f3 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: merging trunk changes, a couple of bug-fixes for new neural-net...

sandbox/dan2: merging trunk changes, a couple of bug-fixes for new neural-net training setup (RE grabbing GPUs), and adding a new example script in RM setup

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3132 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent d0b69698
......@@ -11,9 +11,9 @@ dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
......@@ -22,9 +22,9 @@ dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/de
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL DEV data files
......
......@@ -11,9 +11,9 @@ dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
......@@ -22,9 +22,9 @@ dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/de
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL DEV data files
......
......@@ -11,20 +11,20 @@ dev2h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev2h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
#Official DEV data files
dev10h_data_dir=/export/babel/data/201-haitian/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201//dev.list
dev10h_data_list=/export/babel/data/splits/Haitian_Babel201/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Haitian_Babel201/babel201-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#Official EVAL period evaluation data files
......
......@@ -11,20 +11,20 @@ dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL DEV data files
......@@ -85,6 +85,7 @@ numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=false
use_ffv=false
# Scoring protocols (dummy GLM file to appease the scoring script)
......
......@@ -11,20 +11,20 @@ dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL DEV data files
......@@ -85,6 +85,7 @@ numGaussSGMM=80000
# Lexicon and Language Model parameters
oovSymbol="<unk>"
lexiconFlags="--oov <unk>"
use_pitch=true
use_ffv=true
# Scoring protocols (dummy GLM file to appease the scoring script)
......
......@@ -11,25 +11,25 @@ dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.train.list
devtrain_data_cmudb=
devtrain_stm_file=
devtrain_ecf_file=
......
......@@ -11,25 +11,25 @@ dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=20
dev2h_nj=18
#Official DEV data files
dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
devtrain_data_dir=/export/babel/data/206-zulu/release-current/conversational/training/
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.wav.list
devtrain_data_list=/export/babel/data/splits/Zulu_Babel206/dev.train.list
devtrain_data_cmudb=
devtrain_stm_file=
devtrain_ecf_file=
......
......@@ -11,9 +11,9 @@ dev2h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev/
dev2h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.2hr.list
dev2h_data_cmudb=
dev2h_stm_file=
dev2h_ecf_file=
dev2h_rttm_file=
dev2h_kwlist_file=
dev2h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev2h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev2h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev2h_subset_ecf=true
dev2h_nj=18
......@@ -22,9 +22,9 @@ dev10h_data_dir=/export/babel/data/206-zulu/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Zulu_Babel206/dev.list
dev10h_data_cmudb=
dev10h_stm_file=
dev10h_ecf_file=
dev10h_rttm_file=
dev10h_kwlist_file=
dev10h_ecf_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.ecf.xml
dev10h_rttm_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.rttm
dev10h_kwlist_file=/export/babel/data/splits/Zulu_Babel206/babel206-v1.0_conv-jhu10hdev.kwlist.xml
dev10h_nj=32
#RADICAL EVAL data files (difference between TRAIN-FULL TRAIN-LIMITED)
......
......@@ -18,9 +18,9 @@ sub KeywordSort {
my $Usage = <<EOU;
This script reads a alignment.csv file and computes the ATWV, OTWV, MTWV by
sweeping the threshold. The duration of the search collection is supposed to be
provided. In the Babel case, the duration should be half of the total audio
duration.
sweeping the threshold. It also computes the lattice recall. The duration of
the search collection is supposed to be provided. In the Babel case, the
duration should be half of the total audio duration.
The alignment.csv file is supposed to have the following fields for each line:
language,file,channel,termid,term,ref_bt,ref_et,sys_bt,sys_et,sys_score,
......@@ -56,6 +56,9 @@ open(A, "<$alignment_in") || die "$0: Fail to open alignment file: $alignment_in
my %Ntrue;
my %keywords;
my %alignment;
my $true_miss = 0;
my $soft_miss = 0;
my $true_hit = 0;
while (<A>) {
chomp;
my @col = split(',');
......@@ -94,6 +97,15 @@ while (<A>) {
}
$Ntrue{$col[3]} += 1;
$keywords{$col[3]} = 1;
# The following is for lattice recall.
if ($col[11] eq "CORR" && $col[10] eq "YES") {
$true_hit ++;
} elsif ($col[11] eq "MISS" && $col[10] eq "NO") {
$soft_miss ++;
} elsif ($col[11] eq "MISS" && $col[10] eq "") {
$true_miss ++;
}
next;
}
}
......@@ -159,6 +171,9 @@ $otwv /= scalar(keys %keywords);
$otwv = sprintf("%.4f", $otwv);
$mtwv /= scalar(keys %keywords);
$mtwv = sprintf("%.4f", $mtwv);
my $lattice_recall = 1 - $true_miss / ($true_miss + $soft_miss + $true_hit);
$lattice_recall = sprintf("%.4f", $lattice_recall);
print "ATWV = $atwv\n";
print "OTWV = $otwv\n";
print "MTWV = $mtwv, THRESHOLD = $mtwv_threshold\n";
print "Lattice Recall = $lattice_recall\n";
......@@ -46,12 +46,18 @@ nnet_tanh_6l/decode_eval/cer_10:%CER 21.34 [ 1614 / 7562, 369 ins, 487 del, 758
nnet_4m_3l/decode_eval/cer_10:%CER 22.38 [ 1692 / 7562, 372 ins, 510 del, 810 sub ] # 3 hidden layers neural network
nnet_tanh_3l/decode_eval/cer_10:%CER 22.11 [ 1672 / 7562, 391 ins, 489 del, 792 sub ] # 3 hidden layers neural network (nnet2 script, 1024 neurons)
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # 6 layers DNN - pretrained RBM, cross entropy trained DNN
tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN
tri5a_pretrain-dbn_dnn/decode/cer_10:%CER 20.48 [ 1549 / 7562, 383 ins, 468 del, 698 sub ] # 6 layers DNN - pretrained RBM, cross entropy trained DNN
tri5a_pretrain-dbn_dnn_smbr/decode_it1/cer_10:%CER 18.73 [ 1416 / 7562, 306 ins, 453 del, 657 sub ] # sMBR trained DNN (1024 neurons)
tri5a_pretrain-dbn_dnn_smbr/decode_it2/cer_10:%CER 18.73 [ 1416 / 7562, 310 ins, 446 del, 660 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_it3/cer_10:%CER 18.62 [ 1408 / 7562, 313 ins, 446 del, 649 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_it4/cer_10:%CER 18.66 [ 1411 / 7562, 307 ins, 458 del, 646 sub ]
tri5a_pretrain-dbn_dnn2/decode/cer_10:%CER 20.56 [ 1555 / 7562, 388 ins, 463 del, 704 sub ] # (2048 neurons) <= doesn't outperform 1024 neurons system
tri5a_pretrain-dbn_dnn_smbr2/decode_it1/cer_10:%CER 19.06 [ 1441 / 7562, 319 ins, 472 del, 650 sub ] # sMBR trained DNN <= converge quickly
tri5a_pretrain-dbn_dnn_smbr2/decode_it2/cer_10:%CER 19.08 [ 1443 / 7562, 315 ins, 470 del, 658 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it3/cer_10:%CER 19.00 [ 1437 / 7562, 315 ins, 462 del, 660 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it4/cer_10:%CER 18.96 [ 1434 / 7562, 314 ins, 470 del, 650 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_it5/cer_10:%CER 18.95 [ 1433 / 7562, 317 ins, 460 del, 656 sub ]
### 16K wordlist close LM, the perplexity of the LM was optimized with the sentences of evaluation data
tri1/decode_eval_closelm/cer_10:%CER 46.69 [ 3531 / 7562, 1205 ins, 407 del, 1919 sub ]
......@@ -106,6 +112,12 @@ tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2/cer_10:%CER 15.30 [ 1157 / 7562,
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it3/cer_10:%CER 15.52 [ 1174 / 7562, 280 ins, 408 del, 486 sub ]
tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it4/cer_10:%CER 15.62 [ 1181 / 7562, 278 ins, 412 del, 491 sub ]
tri5a_pretrain-dbn_dnn2/decode_closelm_xeon3.5/cer_10:%CER 17.06 [ 1290 / 7562, 347 ins, 433 del, 510 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it1/cer_10:%CER 15.87 [ 1200 / 7562, 292 ins, 436 del, 472 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it2/cer_10:%CER 15.71 [ 1188 / 7562, 285 ins, 433 del, 470 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it3/cer_10:%CER 15.76 [ 1192 / 7562, 286 ins, 430 del, 476 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it4/cer_10:%CER 15.74 [ 1190 / 7562, 287 ins, 428 del, 475 sub ]
tri5a_pretrain-dbn_dnn_smbr2/decode_closelm_it5/cer_10:%CER 15.70 [ 1187 / 7562, 286 ins, 428 del, 473 sub ]
##### Below are the results of wide beam decoding #####
......
......@@ -16,22 +16,31 @@
# WORDX WORDY
# WORDX WORDY WORDZ
if($#ARGV+1 != 2 && $#ARGV+1 != 3) {
if(@ARGV < 2 || @ARGV > 4) {
printUsage();
exit;
}
$dictfile = shift @ARGV;
$inputfile = shift @ARGV;
$usespron=0;
if(@ARGV == 3) {
if($ARGV[2] ne "--spron") {
printUsage();
exit;
}
$usespron=1;
$mergeword=0;
$mergewordhypen=0;
while (@ARGV > 0) {
$param = shift @ARGV;
if($param eq "--spron") { $usespron=1; }
elsif ($param eq "--mergewords" ) { $mergeword = 1; }
elsif ($param eq "--mergewords_withhypen" ) { $mergewordhypen = 1; }
else { printUsage(); exit; }
}
$dictfile=$ARGV[0];
$inputfile=$ARGV[1];
if($mergeword==1 && $mergewordhypen==1) {
print "--mergewords option and --mergewords_withhypen option can not be used at the same time,\n";
print "please apply with only one of them.\n";
exit;
}
%dictionarylist=();
open(INFILE, $dictfile) || die("Can't open dict ".$dictfile."\n");
......@@ -51,6 +60,12 @@ open(INFILE, $inputfile) || die("Can't open wordlist ".$inputfile."\n");
while(<INFILE>) {
chomp;
$phrase = $_;
if($mergeword==1) {
$phrase =~ s/\s+//g;
}
elsif($mergewordhypen==1) {
$phrase =~ s/\s+/-/g;
}
@line = split(/\s+/);
## single pronunciation handling
......@@ -59,10 +74,7 @@ while(<INFILE>) {
next;
}
for($i=0; $i<scalar(@line); $i++) {
print $line[$i]." ";
}
print "\t";
print $phrase."\t";
for($i=0; $i<scalar(@line); $i++) {
if(!exists($dictionarylist{$line[$i]})) {
......@@ -123,8 +135,14 @@ while(<INFILE>) {
close(INFILE);
sub printUsage {
print "usage: perl hkust_extract_subdict.pl dict wordlist [--spron]\n\n";
print "### this script handle multiple pronunciations for dict in default\n";
print "### if you want to extract single(top) pronunciation from dict, please use the option --spron\n\n";
print "usage: perl hkust_extract_subdict.pl dict wordlist [--spron] [--mergewords | --mergewords_withhypen]\n\n";
print "### This script can output a subdict when a dictionary and a wordlist are supplied\n";
print "### This script can also generate dict entries for wordlist with multiple words in line\n\n";
print "### This script handles multiple pronunciations for dict by default.\n";
print "### If you want to extract single(top) pronunciation from dict, please use the option --spron\n\n";
print "### The --mergewords option is useful if you want to merge the multiple words to single phrase \n";
print " in output format (e.g. 特別 行政 區 => 特別行政區)\n";
print "### The --mergewords_withhypen option is the same as --mergewords option except it merges the \n";
print " multiple words with hypen in between (e.g. MACBOOK PRO => MACBOOK-PRO) in output format\n\n";
}
......@@ -71,3 +71,5 @@ acwt=0.1
data-fmllr-tri5a/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1;
}
## The above process were repeated for 2048 neurons system as well (i.e. --hid-dim 2048), CE DNN => "exp/tri5a_pretrain-dbn2",sMBR => "exp/tri5a_pretrain-dbn_dnn_smbr2"
......@@ -70,4 +70,3 @@ if [ $stage -le 4 ]; then
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 --feat-type raw \
exp/tri3b/graph_ug data/test_fbank exp/nnet4b/decode_ug
fi
#!/bin/bash
# This is neural net training on top of adapted 40-dimensional features.
# This version of the script uses GPUs. We distinguish it by putting "_gpu"
# at the end of the directory name.
#
# Since we're using one quarter the number of jobs (num-jobs-nnet) as the
# run_4c.sh script, we halve the learning rate (generally speaking, splitting
# the difference like this is probably a good idea.)
parallel_opts="-l gpu=1,hostname=g*" # This is suitable for the CLSP network, you'll likely have to change it.
. cmd.sh
( steps/nnet2/train_tanh.sh --num-epochs 20 --stage 23 \
--num-jobs-nnet 4 --num-threads 1 --parallel-opts "$parallel_opts" \
--num-epochs-extra 10 --add-layers-period 1 \
--num-hidden-layers 2 \
--mix-up 4000 \
--initial-learning-rate 0.01 --final-learning-rate 0.002 \
--cmd "$decode_cmd" \
--hidden-layer-dim 375 \
data/train data/lang exp/tri3b_ali exp/nnet4c_gpu
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode \
exp/tri3b/graph data/test exp/nnet4c_gpu/decode
steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--transform-dir exp/tri3b/decode_ug \
exp/tri3b/graph_ug data/test exp/nnet4c_gpu/decode_ug
)
......@@ -16,7 +16,7 @@ numGaussUBM=400
numLeavesSGMM=7000
numGaussSGMM=9000
decode_nj=20
decode_nj=5
train_nj=30
echo ============================================================================
......
......@@ -17,7 +17,7 @@
steps/train_sgmm.sh --cmd "$train_cmd" \
3500 10000 data/train_si84 data/lang exp/tri4b_ali_si84 \
exp/ubm5b/final.ubm exp/sgmm5a || exit 1;
exp/ubm5a/final.ubm exp/sgmm5a || exit 1;
(
utils/mkgraph.sh data/lang_test_tgpr exp/sgmm5a exp/sgmm5a/graph_tgpr
......@@ -110,4 +110,3 @@ steps/train_sgmm.sh --cmd "$train_cmd" \
# Decode from lattices in exp/sgmm5a/decode_tgpr_dev93.
steps/decode_sgmm_fromlats.sh --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_dev93 \
data/test_dev93 data/lang_test_tgpr exp/sgmm5a/decode_tgpr_dev93 exp/sgmm5c/decode_tgpr_dev93
......@@ -330,6 +330,7 @@ local/run_dnn.sh
# KWS setup. We leave it commented out by default
# $duration is the length of the search collection, in seconds
#duration=`feat-to-len scp:data/test_eval92/feats.scp ark,t:- | awk '{x+=$2} END{print x/100;}'`
#local/generate_example_kws.sh data/test_eval92/ data/kws/
......
......@@ -2,7 +2,6 @@
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script trains a fairly vanilla network with tanh nonlinearities.
# Begin configuration section.
......@@ -268,7 +267,7 @@ while [ $x -lt $num_iters ]; do
nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
$cmd $dir/log/progress.$x.log \
nnet-show-progress $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
nnet-show-progress --use-gpu=no $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
fi
echo "Training neural net (pass $x)"
......
......@@ -67,7 +67,7 @@ fi
if [ -f $srcdir/segments ]; then
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$srcdir/segments
utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments >$destdir/segments
cp $srcdir/wav.scp $destdir
if [ -f $srcdir/reco2file_and_channel ]; then
cp $srcdir/reco2file_and_channel $destdir/
......
......@@ -138,7 +138,7 @@ int main(int argc, char *argv[]) {
for (std::map<EventType, GaussClusterable*>::const_iterator iter = tree_stats.begin();
iter != tree_stats.end();
iter++ ) {
stats.push_back(std::make_pair<EventType, GaussClusterable*>(iter->first, iter->second));
stats.push_back(std::make_pair(iter->first, iter->second));
}
tree_stats.clear();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment