Commit 6423ac8d authored by Jan Trmal's avatar Jan Trmal
Browse files

Adding IRSTLM presence checks

parent 0b52c1d5
...@@ -27,6 +27,19 @@ esac ...@@ -27,6 +27,19 @@ esac
# Load previous / store the new AMI_DIR location, # Load previous / store the new AMI_DIR location,
[ -r conf/ami_dir ] && AMI_DIR=$(cat conf/ami_dir) || echo $AMI_DIR >conf/ami_dir [ -r conf/ami_dir ] && AMI_DIR=$(cat conf/ami_dir) || echo $AMI_DIR >conf/ami_dir
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
# Set bash to 'debug' mode, it will exit on : # Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e set -e
......
...@@ -22,13 +22,25 @@ local=`pwd`/local ...@@ -22,13 +22,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# SI-84 clean training data # SI-84 clean training data
......
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
...@@ -28,13 +28,25 @@ local=`pwd`/local ...@@ -28,13 +28,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
...@@ -63,8 +63,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) ...@@ -63,8 +63,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
if [ -z $IRSTLM ] ; then if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/ export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi fi
export PATH=${PATH}:$IRSTLM/bin
if [ ! -f $IRSTLM/bin/dict ] ; then if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2 echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2 echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2 echo "$0: Error: this is no longer the case." >&2
...@@ -76,10 +76,10 @@ fi ...@@ -76,10 +76,10 @@ fi
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \ cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train > $srcdir/lm_train
$IRSTLM/bin/build-lm.sh -i $srcdir/lm_train -n 2 \ build-lm.sh -i $srcdir/lm_train -n 2 \
-o $tmpdir/lm_phone_bg.ilm.gz -o $tmpdir/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \ compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
......
...@@ -52,6 +52,19 @@ do ...@@ -52,6 +52,19 @@ do
esac esac
done done
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $WDIR; cd $WDIR;
tmpdir=$(mktemp -d); tmpdir=$(mktemp -d);
trap 'rm -rf "$tmpdir"' EXIT trap 'rm -rf "$tmpdir"' EXIT
......
...@@ -49,6 +49,19 @@ while [ $# -gt 0 ]; do ...@@ -49,6 +49,19 @@ while [ $# -gt 0 ]; do
esac esac
done done
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
for L in $LANGUAGES; do for L in $LANGUAGES; do
lm=$LMDIR/${L}.3gram.lm.gz lm=$LMDIR/${L}.3gram.lm.gz
[ -f $lm ] || { echo "LM '$lm' not found"; exit 1; } [ -f $lm ] || { echo "LM '$lm' not found"; exit 1; }
......
...@@ -12,6 +12,19 @@ exit 1; ...@@ -12,6 +12,19 @@ exit 1;
. cmd.sh . cmd.sh
. path.sh . path.sh
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
# Data prep # Data prep
# Here we make some Edinburgh-specific changes from the Kaldi recipe in # Here we make some Edinburgh-specific changes from the Kaldi recipe in
# trunk/egs/swbd/s5 (rev. 1841). The major differences are that everything is # trunk/egs/swbd/s5 (rev. 1841). The major differences are that everything is
...@@ -47,7 +60,8 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \ ...@@ -47,7 +60,8 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
data/lang $LM data/local/dict/lexicon.txt data/lang_sw1_fsh_tg data/lang $LM data/local/dict/lexicon.txt data/lang_sw1_fsh_tg
# For some funny reason we are still using IRSTLM for doing LM pruning :) # For some funny reason we are still using IRSTLM for doing LM pruning :)
prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o3g.kn.gz /dev/stdout \ prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o3g.kn.gz \
/dev/stdout \
| gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz | gzip -c > data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
LM=data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz LM=data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
......
...@@ -18,6 +18,19 @@ set -e # exit on error ...@@ -18,6 +18,19 @@ set -e # exit on error
# want to store MFCC features. # want to store MFCC features.
mfccdir=mfcc mfccdir=mfcc
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
# Prepare Switchboard data. This command can also take a second optional argument # Prepare Switchboard data. This command can also take a second optional argument
# which specifies the directory to Switchboard documentations. Specifically, if # which specifies the directory to Switchboard documentations. Specifically, if
...@@ -54,6 +67,7 @@ local/swbd1_train_lms.sh data/local/train/text \ ...@@ -54,6 +67,7 @@ local/swbd1_train_lms.sh data/local/train/text \
data/local/dict/lexicon.txt data/local/lm $fisher_dirs data/local/dict/lexicon.txt data/local/lm $fisher_dirs
# We don't really need all these options for SRILM, since the LM training script # We don't really need all these options for SRILM, since the LM training script
# does some of the same processings (e.g. -subset -tolower) # does some of the same processings (e.g. -subset -tolower)
for order in 3 4; do for order in 3 4; do
lm_suffix="tg" lm_suffix="tg"
[ $order -eq 3 ] || lm_suffix="fg" [ $order -eq 3 ] || lm_suffix="fg"
...@@ -66,7 +80,6 @@ for order in 3 4; do ...@@ -66,7 +80,6 @@ for order in 3 4; do
utils/build_const_arpa_lm.sh $LM data/lang data/lang_sw1_fsh_$lm_suffix utils/build_const_arpa_lm.sh $LM data/lang data/lang_sw1_fsh_$lm_suffix
# For some funny reason we are still using IRSTLM for doing LM pruning :) # For some funny reason we are still using IRSTLM for doing LM pruning :)
export PATH=$PATH:../../../tools/irstlm/bin/
prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o${order}g.kn.gz /dev/stdout \ prune-lm --threshold=1e-7 data/local/lm/sw1_fsh.o${order}g.kn.gz /dev/stdout \
| gzip -c > data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz || exit 1 | gzip -c > data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz || exit 1
LM=data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz LM=data/local/lm/sw1_fsh.o${order}g.pr1-7.kn.gz
......
...@@ -100,8 +100,8 @@ cut -f1 data/local/lexicon.txt \ ...@@ -100,8 +100,8 @@ cut -f1 data/local/lexicon.txt \
if [ -z $IRSTLM ] ; then if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/ export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi fi
export PATH=${PATH}:$IRSTLM/bin
if [ ! -f $IRSTLM/bin/dict ] ; then if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2 echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2 echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2 echo "$0: Error: this is no longer the case." >&2
...@@ -116,10 +116,10 @@ cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \ ...@@ -116,10 +116,10 @@ cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \ cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> data/local/lm_train.txt > data/local/lm_train.txt
$IRSTLM/bin/build-lm.sh -i data/local/lm_train.txt -n 2 \ build-lm.sh -i data/local/lm_train.txt -n 2 \
-o data/local/lm_phone_bg.ilm.gz -o data/local/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \ compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \
| grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz | grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz
) >& data/prepare_lm.log ) >& data/prepare_lm.log
......
...@@ -64,8 +64,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) ...@@ -64,8 +64,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
if [ -z $IRSTLM ] ; then if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/ export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi fi
export PATH=${PATH}:$IRSTLM/bin
if [ ! -f $IRSTLM/bin/dict ] ; then if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2 echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2 echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2 echo "$0: Error: this is no longer the case." >&2
...@@ -77,10 +77,10 @@ fi ...@@ -77,10 +77,10 @@ fi
cut -d' ' -f2- $srcdir/train.text | sed -e 's:^:<s> :' -e 's:$: </s>:' \ cut -d' ' -f2- $srcdir/train.text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train.text > $srcdir/lm_train.text
$IRSTLM/bin/build-lm.sh -i $srcdir/lm_train.text -n 2 \ build-lm.sh -i $srcdir/lm_train.text -n 2 \
-o $tmpdir/lm_phone_bg.ilm.gz -o $tmpdir/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \ compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
echo "Dictionary & language model preparation succeeded" echo "Dictionary & language model preparation succeeded"
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
...@@ -17,15 +17,26 @@ local=`pwd`/local ...@@ -17,15 +17,26 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
cd $dir if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir
# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command # Make directory of links to the WSJ disks such as 11-13.1. This relies on the command
# line arguments being absolute pathnames. # line arguments being absolute pathnames.
rm -r links/ 2>/dev/null rm -r links/ 2>/dev/null
......
...@@ -178,8 +178,8 @@ ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout ...@@ -178,8 +178,8 @@ ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout
if [ -z $IRSTLM ] ; then if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/ export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi fi
export PATH=${PATH}:$IRSTLM/bin
if [ ! -f $IRSTLM/bin/dict ] ; then if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2 echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2 echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2 echo "$0: Error: this is no longer the case." >&2
...@@ -193,12 +193,12 @@ mkdir $idir ...@@ -193,12 +193,12 @@ mkdir $idir
gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | $IRSTLM/scripts/add-start-end.sh | \ gunzip -c $srcdir/cleaned.gz | tail -n +$heldout_sent | $IRSTLM/scripts/add-start-end.sh | \
gzip -c > $idir/train.gz gzip -c > $idir/train.gz
$IRSTLM/bin/dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no dict -i=WSJ.cleaned.irstlm.txt -o=dico -f=y -sort=no
cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\ cat dico | gawk 'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
{print $0;}}' > vocab.irstlm.20k {print $0;}}' > vocab.irstlm.20k
$IRSTLM/bin/build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz -p yes \ build-lm.sh -i "gunzip -c $idir/train.gz" -o $idir/lm_3gram.gz -p yes \
-n 3 -s improved-kneser-ney -b yes -n 3 -s improved-kneser-ney -b yes
# Testing perplexity with SRILM tools: # Testing perplexity with SRILM tools:
ngram -lm $idir/lm_3gram.gz -ppl $sdir/cleaned.heldout ngram -lm $idir/lm_3gram.gz -ppl $sdir/cleaned.heldout
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment