Commit 0b52c1d5 authored by Jan Trmal's avatar Jan Trmal
Browse files

Adding checks if IRSTLM is really installed, fail if it's not

parent db63ae29
......@@ -60,17 +60,26 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
>> $dir/extra_questions.txt || exit 1;
# (2) Create the phone bigram LM
[ -z "$IRSTLM" ] && \
echo "LM building won't work without setting the IRSTLM env variable" && exit 1;
! which build-lm.sh 2>/dev/null && \
echo "IRSTLM does not seem to be installed (build-lm.sh not on your path): " && \
echo "go to <kaldi-root>/tools and try 'make irstlm_tgt'" && exit 1;
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train
build-lm.sh -i $srcdir/lm_train -n 2 -o $tmpdir/lm_phone_bg.ilm.gz
compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
if [ ! -f $IRSTLM/bin/dict ] ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train
$IRSTLM/bin/build-lm.sh -i $srcdir/lm_train -n 2 \
-o $tmpdir/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
......
export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C
export IRSTLM=$KALDI_ROOT/tools/irstlm
......@@ -97,13 +97,30 @@ cut -f1 data/local/lexicon.txt \
# (4) Create the phone bigram LM
(
[ -z "$IRSTLM" ] && \
error_exit "LM building wo'nt work without setting the IRSTLM env variable"
cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> data/local/lm_train.txt
build-lm.sh -i data/local/lm_train.txt -n 2 -o data/local/lm_phone_bg.ilm.gz
compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \
| grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
if [ ! -f $IRSTLM/bin/dict ] ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train
cut -d' ' -f2- data/local/train.trans2 | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> data/local/lm_train.txt
$IRSTLM/bin/build-lm.sh -i data/local/lm_train.txt -n 2 \
-o data/local/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm data/local/lm_phone_bg.ilm.gz --text yes /dev/stdout \
| grep -v unk | gzip -c > data/local/lm_phone_bg.arpa.gz
) >& data/prepare_lm.log
......
......@@ -3,6 +3,7 @@
# The KALDIROOT enviromnent variable must be set by the user.
# KALDIROOT=/absolute/path/to/kaldi/installation
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
KALDISRC=$KALDIROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
......@@ -28,7 +29,6 @@ TOOLS=$SPH2PIPE
export PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$TOOLS
export LC_ALL=C
export IRSTLM=$KALDIROOT/tools/irstlm
## Site-specific configs for Edinburgh
# [ `hostname -y` == ecdf ] && \
......
......@@ -61,17 +61,26 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
>> $dir/extra_questions.txt || exit 1;
# (2) Create the phone bigram LM
[ -z "$IRSTLM" ] && \
echo "LM building won't work without setting the IRSTLM env variable" && exit 1;
! which build-lm.sh 2>/dev/null && \
echo "IRSTLM does not seem to be installed (build-lm.sh not on your path): " && \
echo "go to <kaldi-root>/tools and try 'make irstlm_tgt'" && exit 1;
cut -d' ' -f2- $srcdir/train.text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train.text
build-lm.sh -i $srcdir/lm_train.text -n 2 -o $tmpdir/lm_phone_bg.ilm.gz
compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
if [ ! -f $IRSTLM/bin/dict ] ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cut -d' ' -f2- $srcdir/train.text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train.text
$IRSTLM/bin/build-lm.sh -i $srcdir/lm_train.text -n 2 \
-o $tmpdir/lm_phone_bg.ilm.gz
$IRSTLM/bin/compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
echo "Dictionary & language model preparation succeeded"
export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C
export IRSTLM=$KALDI_ROOT/tools/irstlm
......@@ -174,7 +174,19 @@ ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout
## From here is how to train with
# IRSTLM. This is not really working at the moment.
export IRSTLM=$KALDI_ROOT/tools/irstlm/
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
if [ ! -f $IRSTLM/bin/dict ] ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
idir=$dir/irstlm
mkdir $idir
......
export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
export LC_ALL=C
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment