Commit 6122c551 authored by Dan Povey's avatar Dan Povey
Browse files

Fixes and updates to HTK conversion script.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@961 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 09edc91f
......@@ -5,7 +5,23 @@ script is not very general-- since HTK's MMF format can contain almost
anything, it's hard to create a general solution.
# HTK conversion script:
./convert_htk.sh ../.. /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm84/MMF /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm10_800_500/cluster.trees convert_dir
# This example for the conversion was kindly provided Yanqing Sun. However, we
# are not sure that it has been correctly converted. We are providing it as an
# example anyway-- perhaps someone can debug it. It has been our experience
# when converting HTK models that whenever we get the conversion script working,
# some other HTK model comes along and breaks the script. In general this
# conversion is something that we don't aim to put a lot of effort into
# supporting-- our attitude is, if you like HTK, then use HTK.
wget --no-check-certificate https://sourceforge.net/projects/kaldi/files/wsj_ascii.tar.gz
tar -xvzf wsj_ascii.tar.gz
./convert_htk.sh --no-cleanup ../.. ascii/hmmdefs trees kaldi
# on BUT system:
#./convert_htk.sh ../.. /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm84/MMF /mnt/matylda5/jhu09/setup/CH1/English/exp/xwrd.R0_800_TB500/hmm10_800_500/cluster.trees convert_dir
# can add optional args --no-cleanup and --linear-topology
......
......@@ -109,7 +109,7 @@ dim=`grep -w MEAN $mmf | head -1 | awk '{print $2}'` # probably 39.
convert_states.pl $dim < states2.txt > kaldi.am_gmm
gmm-init-trans kaldi.topo kaldi.am_gmm kaldi.tree kaldi.mdl || exit 1;
gmm-init-trans --binary=false kaldi.topo kaldi.am_gmm kaldi.tree kaldi.mdl || exit 1;
# clean up:
......
......@@ -37,7 +37,7 @@ for($pdf = 0; $pdf < $numpdfs; $pdf++) {
defined $nummix{$pdf} || die "No nummix defined for pdf = $pdf\n";
print " <DiagGMMBegin>\n";
$nm = $nummix{$pdf};
print " <WEIGHTS> FV $nm [ ";
print " <WEIGHTS> [ ";
for($n = 0; $n < $nm; $n++) { print "$weight{$pdf,$n} "; }
print "]\n";
print " <MEANS_INVVARS> [\n";
......
......@@ -47,6 +47,8 @@ while(<>) {
if(m/~h \"(.+)\"/) {
$phone = $1; # in context currently.
if($phone =~ m:.+\-(.+)\+.+: ) { $phone = $1; } # Remove context.
elsif ($phone =~ m:.+\-(.+): ) { $phone = $1; }
elsif ($phone =~ m:(.+)\+.+: ) { $phone = $1; }
while(<>) {
if(m/\<ENDHMM\>/) { last; } # no longer parsing this HMM.
elsif(m/~t \"(.+)\"/) {
......@@ -62,13 +64,14 @@ while(<>) {
if(!defined $phone2trans{$phone}) {
$phone2trans{$phone} = ReadNLines($nlines);
} else {
$phone2trans{$phone} eq ReadNLines($nlines) || print STDERR "Conflicting definitions for transition matrix for phone $phone: this conversion program will give you the wrong answer.\n";
$v = ReadNLines($nlines);
$phone2trans{$phone} eq $v || print STDERR "Conflicting definitions for transition matrix for phone $phone: this conversion program will give you the wrong answer: $phone2trans{$phone} versus $v\n";
}
}
}
}
if(m/\~t \"(.+)\"/) { # defining a ransition macro...
if(m/\~t \"(.+)\"/) { # defining a transition macro...
$macroname = $1;
($tok,$n) = split(" ", <>); # Split the line like <TRANSP> 5
$tok == "<TRANSP>" || die "Bad line $. in MMF\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment