Commit 827a5d62 authored by Daniel Povey's avatar Daniel Povey
Browse files

Merge pull request #12 from jtrmal/irstlm-checks

Irstlm checks
parents 514e1ac1 55c7917e
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
......
...@@ -27,6 +27,19 @@ esac ...@@ -27,6 +27,19 @@ esac
# Load previous / store the new AMI_DIR location, # Load previous / store the new AMI_DIR location,
[ -r conf/ami_dir ] && AMI_DIR=$(cat conf/ami_dir) || echo $AMI_DIR >conf/ami_dir [ -r conf/ami_dir ] && AMI_DIR=$(cat conf/ami_dir) || echo $AMI_DIR >conf/ami_dir
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
# Set bash to 'debug' mode, it will exit on : # Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e set -e
......
...@@ -22,13 +22,25 @@ local=`pwd`/local ...@@ -22,13 +22,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# SI-84 clean training data # SI-84 clean training data
......
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
. /export/babel/data/software/env.sh . /export/babel/data/software/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/tools/sph2pipe_v2.5/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
......
...@@ -28,13 +28,25 @@ local=`pwd`/local ...@@ -28,13 +28,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/tools/kaldi_lm/:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/tools/kaldi_lm/:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
...@@ -25,13 +25,25 @@ local=`pwd`/local ...@@ -25,13 +25,25 @@ local=`pwd`/local
utils=`pwd`/utils utils=`pwd`/utils
. ./path.sh # Needed for KALDI_ROOT . ./path.sh # Needed for KALDI_ROOT
export PATH=$PATH:$KALDI_ROOT/tools/irstlm/bin
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
if [ ! -x $sph2pipe ]; then if [ ! -x $sph2pipe ]; then
echo "Could not find (or execute) the sph2pipe program at $sph2pipe"; echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
exit 1; exit 1;
fi fi
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $dir cd $dir
# This version for SI-84 # This version for SI-84
......
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
...@@ -60,17 +60,26 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) ...@@ -60,17 +60,26 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
>> $dir/extra_questions.txt || exit 1; >> $dir/extra_questions.txt || exit 1;
# (2) Create the phone bigram LM # (2) Create the phone bigram LM
[ -z "$IRSTLM" ] && \ if [ -z $IRSTLM ] ; then
echo "LM building won't work without setting the IRSTLM env variable" && exit 1; export IRSTLM=$KALDI_ROOT/tools/irstlm/
! which build-lm.sh 2>/dev/null && \ fi
echo "IRSTLM does not seem to be installed (build-lm.sh not on your path): " && \ export PATH=${PATH}:$IRSTLM/bin
echo "go to <kaldi-root>/tools and try 'make irstlm_tgt'" && exit 1; if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \ echo "$0: Error: We used to install it by default, but." >&2
> $srcdir/lm_train echo "$0: Error: this is no longer the case." >&2
build-lm.sh -i $srcdir/lm_train -n 2 -o $tmpdir/lm_phone_bg.ilm.gz echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \ exit 1
fi
cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
> $srcdir/lm_train
build-lm.sh -i $srcdir/lm_train -n 2 \
-o $tmpdir/lm_phone_bg.ilm.gz
compile-lm $tmpdir/lm_phone_bg.ilm.gz -t=yes /dev/stdout | \
grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz grep -v unk | gzip -c > $lmdir/lm_phone_bg.arpa.gz
......
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/irstlm/bin/:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
export IRSTLM=$KALDI_ROOT/tools/irstlm
...@@ -52,6 +52,19 @@ do ...@@ -52,6 +52,19 @@ do
esac esac
done done
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
cd $WDIR; cd $WDIR;
tmpdir=$(mktemp -d); tmpdir=$(mktemp -d);
trap 'rm -rf "$tmpdir"' EXIT trap 'rm -rf "$tmpdir"' EXIT
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
# the GlobalPhone experiments. # the GlobalPhone experiments.
KALDIROOT=/exports/home/aghoshal/kaldi/trunk KALDIROOT=/exports/home/aghoshal/kaldi/trunk
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
KALDISRC=$KALDIROOT/src KALDISRC=$KALDIROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
......
...@@ -49,6 +49,19 @@ while [ $# -gt 0 ]; do ...@@ -49,6 +49,19 @@ while [ $# -gt 0 ]; do
esac esac
done done
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v prune-lm >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
for L in $LANGUAGES; do for L in $LANGUAGES; do
lm=$LMDIR/${L}.3gram.lm.gz lm=$LMDIR/${L}.3gram.lm.gz
[ -f $lm ] || { echo "LM '$lm' not found"; exit 1; } [ -f $lm ] || { echo "LM '$lm' not found"; exit 1; }
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
export LC_ALL=C # For expected sorting and joining behaviour export LC_ALL=C # For expected sorting and joining behaviour
KALDI_ROOT=/homes/eva/q/qghoshal/src/kaldi/trunk KALDI_ROOT=/homes/eva/q/qghoshal/src/kaldi/trunk
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
KALDISRC=$KALDI_ROOT/src KALDISRC=$KALDI_ROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
......
...@@ -9,7 +9,18 @@ ...@@ -9,7 +9,18 @@
# data/train_si284, data/train_si84, etc. # data/train_si284, data/train_si84, etc.
. ./path.sh || exit 1; . ./path.sh || exit 1;
export PATH=$KALDI_ROOT/tools/irstlm/bin:$PATH if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v ngt >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
srcdict=$1 srcdict=$1
newtext=$2 newtext=$2
...@@ -119,7 +130,7 @@ if [ ! -f $lmdir/extra4.ngt ]; ...@@ -119,7 +130,7 @@ if [ ! -f $lmdir/extra4.ngt ];
awk '{if(NF>=4){ printf("%s\n",$0); }}' > $lmdir/text.filt awk '{if(NF>=4){ printf("%s\n",$0); }}' > $lmdir/text.filt
# Envelop LM training data in context cues # Envelop LM training data in context cues
$irstbin/add-start-end.sh < $lmdir/text.filt > $lmdir/lm_input add-start-end.sh < $lmdir/text.filt > $lmdir/lm_input
echo "Creating new binary ngram table $lmdir/extra4.ngt" echo "Creating new binary ngram table $lmdir/extra4.ngt"
......
...@@ -14,6 +14,19 @@ ...@@ -14,6 +14,19 @@
. ./path.sh || exit 1; . ./path.sh || exit 1;
if [ -z $IRSTLM ] ; then
export IRSTLM=$KALDI_ROOT/tools/irstlm/
fi
export PATH=${PATH}:$IRSTLM/bin
if ! command -v ngt >/dev/null 2>&1 ; then
echo "$0: Error: the IRSTLM is not available or compiled" >&2
echo "$0: Error: We used to install it by default, but." >&2
echo "$0: Error: this is no longer the case." >&2
echo "$0: Error: To install it, go to $KALDI_ROOT/tools" >&2
echo "$0: Error: and run extras/install_irstlm.sh" >&2
exit 1
fi
echo "Preparing train and test data" echo "Preparing train and test data"
srcdir=$4 srcdir=$4
lmdir=$5 lmdir=$5
...@@ -25,13 +38,12 @@ lm_suffix=$3 ...@@ -25,13 +38,12 @@ lm_suffix=$3
mkdir -p $lmdir mkdir -p $lmdir
mkdir -p $tmpdir mkdir -p $tmpdir
irstbin=$KALDI_ROOT/tools/irstlm/bin
#grep -P -v '^[\s?|\.|\!]*$' $lexicon | grep -v '^ *$' | \ #grep -P -v '^[\s?|\.|\!]*$' $lexicon | grep -v '^ *$' | \
#awk '{if(NF>=4){ printf("%s\n",$0); }}' > $lmdir/text.filt #awk '{if(NF>=4){ printf("%s\n",$0); }}' > $lmdir/text.filt
# Envelop LM training data in context cues # Envelop LM training data in context cues
$irstbin/add-start-end.sh < $lexicon | awk '{if(NF>=3){ printf("%s\n",$0); }}' > $lmdir/lm_input add-start-end.sh < $lexicon | awk '{if(NF>=3){ printf("%s\n",$0); }}' > $lmdir/lm_input
wait wait
# Next, for each type of language model, create the corresponding FST # Next, for each type of language model, create the corresponding FST
...@@ -40,10 +52,10 @@ wait ...@@ -40,10 +52,10 @@ wait
echo "Preparing language models for test" echo "Preparing language models for test"
# Create Ngram table # Create Ngram table
$irstbin/ngt -i=$lmdir/lm_input -n=$ngram -o=$lmdir/train${ngram}.ngt -b=yes ngt -i=$lmdir/lm_input -n=$ngram -o=$lmdir/train${ngram}.ngt -b=yes
wait wait
# Estimate trigram and quadrigram models in ARPA format # Estimate trigram and quadrigram models in ARPA format
$irstbin/tlm -tr=$lmdir/train${ngram}.ngt -n=$ngram -lm=wb -o=$lmdir/train${ngram}.arpa tlm -tr=$lmdir/train${ngram}.ngt -n=$ngram -lm=wb -o=$lmdir/train${ngram}.arpa
wait wait
...@@ -76,4 +88,4 @@ utils/validate_lang.pl $test || exit 1; ...@@ -76,4 +88,4 @@ utils/validate_lang.pl $test || exit 1;
echo "Succeeded in formatting data." echo "Succeeded in formatting data."
exit 0; exit 0;
#rm -rf $tmpdir #rm -rf $tmpdir
#rm -f $ccs #rm -f $ccs
\ No newline at end of file
export KALDI_ROOT=`pwd`/../../.. export KALDI_ROOT=`pwd`/../../..
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin/:$KALDI_ROOT/src/kwsbin:$PWD:$PATH
export LC_ALL=C export LC_ALL=C
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment