Commit 029582ba authored by Dan Povey's avatar Dan Povey
Browse files

Minor script fixes

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@605 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent d13597fd
......@@ -80,7 +80,7 @@ cat $dir/unigrams | awk -v dict=$dir/dict.cmu \
'BEGIN{while(getline<dict) seen[$1]=1;} {if(!seen[$2]){print;}}' \
> $dir/oov.counts
echo "Most frequent unseen unigrams d are: "
echo "Most frequent unseen unigrams are: "
head $dir/oov.counts
# Prune away singleton counts, and remove things with numbers in
......@@ -143,15 +143,17 @@ cat $dir/dict.acronyms $dir/dict.oovs | sort | uniq > $dir/dict.oovs_merged
awk '{print $1}' $dir/dict.oovs_merged | uniq > $dir/oovlist.handled
sort $dir/oovlist | diff - $dir/oovlist.handled | grep -v 'd' | sed 's:< ::' > $dir/oovlist.not_handled
echo "**Top OOVs we handled are:**";
head $dir/oovlist.handled
echo "**Top OOVs we didn't handle are as follows (not: they are mostly misspellings):**";
head $dir/oovlist.not_handled
# add_counts.pl attaches to original counts to the list of handled/not-handled OOVs
add_counts.pl $dir/oov.counts $dir/oovlist.handled | sort -nr > $dir/oovlist.handled.counts
add_counts.pl $dir/oov.counts $dir/oovlist.not_handled | sort -nr > $dir/oovlist.not_handled.counts
echo "**Top OOVs we handled are:**";
head $dir/oovlist.handled.counts
echo "**Top OOVs we didn't handle are as follows (note: they are mostly misspellings):**";
head $dir/oovlist.not_handled.counts
echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
echo "Count of OOVs we didn't handle due to low count is" \
......
......@@ -82,7 +82,7 @@ steps/train_mono.sh --num-jobs 10 --cmd "$train_cmd" \
data/train_si84_2kshort data/lang exp/mono0a
(
scripts/mkgraph.sh --mono data/lang_test_fgpr exp/mono0a exp/mono0a/graph_fgpr
scripts/mkgraph.sh --mono data/lang_test_tgpr exp/mono0a exp/mono0a/graph_tgpr
scripts/decode.sh --cmd "$decode_cmd" steps/decode_deltas.sh exp/mono0a/graph_tgpr data/test_dev93 exp/mono0a/decode_tgpr_dev93
scripts/decode.sh --cmd "$decode_cmd" steps/decode_deltas.sh exp/mono0a/graph_tgpr data/test_eval92 exp/mono0a/decode_tgpr_eval92
)&
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment