Commit 0827e6ad authored by Dan Povey's avatar Dan Povey
Browse files

Cosmetic fix to rm_format_data.sh

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@609 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 739ed027
...@@ -30,8 +30,6 @@ done ...@@ -30,8 +30,6 @@ done
# Copy stuff into its final location: # Copy stuff into its final location:
for x in $data_list; do for x in $data_list; do
cp data/local/$x.spk2utt data/$x/spk2utt || exit 1; cp data/local/$x.spk2utt data/$x/spk2utt || exit 1;
cp data/local/$x.utt2spk data/$x/utt2spk || exit 1; cp data/local/$x.utt2spk data/$x/utt2spk || exit 1;
...@@ -44,6 +42,7 @@ done ...@@ -44,6 +42,7 @@ done
scripts/make_words_symtab.pl < data/local/G.txt > data/lang/words.txt scripts/make_words_symtab.pl < data/local/G.txt > data/lang/words.txt
scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt scripts/make_phones_symtab.pl < data/local/lexicon.txt > data/lang/phones.txt
cp data/lang/words.txt data/lang_test/words.txt
silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn" silphones="sil"; # This would in general be a space-separated list of all silence phones. E.g. "sil vn"
# Generate colon-separated lists of silence and non-silence phones. # Generate colon-separated lists of silence and non-silence phones.
...@@ -67,17 +66,18 @@ scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \ ...@@ -67,17 +66,18 @@ scripts/make_lexicon_fst.pl data/local/lexicon.txt $silprob sil | \
# ever need to e.g. create ctm's-- these are used to work out the # ever need to e.g. create ctm's-- these are used to work out the
# word boundaries. # word boundaries.
cat data/local/lexicon.txt | \ cat data/local/lexicon.txt | \
awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \ awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' | \
scripts/make_lexicon_fst.pl - 0.5 sil | \ scripts/make_lexicon_fst.pl - 0.5 sil | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang/words.txt \ fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | \ --keep_isymbols=false --keep_osymbols=false | \
fstarcsort --sort_type=olabel > data/lang_test/L_align.fst fstarcsort --sort_type=olabel > data/lang_test/L_align.fst
# L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers) # L_disambig.fst has the disambiguation symbols (c.f. Mohri's papers)
scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \ scripts/make_lexicon_fst.pl data/local/lexicon_disambig.txt $silprob sil '#'$ndisambig | \
fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang/words.txt \ fstcompile --isymbols=data/lang_test/phones_disambig.txt --osymbols=data/lang_test/words.txt \
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \ --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=olabel \
> data/lang_test/L_disambig.fst > data/lang_test/L_disambig.fst
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment