Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Abdelwahab HEBA
kaldi_2015
Commits
6423ac8d
Commit
6423ac8d
authored
Jul 22, 2015
by
Jan Trmal
Browse files
Adding IRSTLM presence checks
parent
0b52c1d5
Changes
16
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
177 additions
and
28 deletions
+177
-28
egs/ami/s5/run_prepare_shared.sh
egs/ami/s5/run_prepare_shared.sh
+13
-0
egs/aurora4/s5/local/aurora4_data_prep.sh
egs/aurora4/s5/local/aurora4_data_prep.sh
+13
-1
egs/aurora4/s5/local/cstr_wsj_data_prep.sh
egs/aurora4/s5/local/cstr_wsj_data_prep.sh
+13
-1
egs/babel/s5/local/cstr_wsj_data_prep.sh
egs/babel/s5/local/cstr_wsj_data_prep.sh
+13
-1
egs/chime3/s5/local/clean_wsj0_data_prep.sh
egs/chime3/s5/local/clean_wsj0_data_prep.sh
+13
-1
egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
+13
-1
egs/farsdat/s5/local/farsdat_prepare_dict.sh
egs/farsdat/s5/local/farsdat_prepare_dict.sh
+4
-4
egs/gp/s1/local/gp_prep_lms_edin.sh
egs/gp/s1/local/gp_prep_lms_edin.sh
+13
-0
egs/gp/s5/local/gp_format_lm.sh
egs/gp/s5/local/gp_format_lm.sh
+13
-0
egs/swbd/s5/run_edin.sh
egs/swbd/s5/run_edin.sh
+15
-1
egs/swbd/s5b/run.sh
egs/swbd/s5b/run.sh
+14
-1
egs/timit/s4/local/timit_data_prep.sh
egs/timit/s4/local/timit_data_prep.sh
+4
-4
egs/timit/s5/local/timit_prepare_dict.sh
egs/timit/s5/local/timit_prepare_dict.sh
+4
-4
egs/wsj/s5/local/cstr_wsj_data_prep.sh
egs/wsj/s5/local/cstr_wsj_data_prep.sh
+13
-1
egs/wsj/s5/local/wsj_data_prep.sh
egs/wsj/s5/local/wsj_data_prep.sh
+15
-4
egs/wsj/s5/local/wsj_train_lms.sh
egs/wsj/s5/local/wsj_train_lms.sh
+4
-4
No files found.
egs/ami/s5/run_prepare_shared.sh
View file @
6423ac8d
...
...
@@ -27,6 +27,19 @@ esac
# Load previous / store the new AMI_DIR location,
[
-r
conf/ami_dir
]
&&
AMI_DIR
=
$(
cat
conf/ami_dir
)
||
echo
$AMI_DIR
>
conf/ami_dir
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set
-e
...
...
egs/aurora4/s5/local/aurora4_data_prep.sh
View file @
6423ac8d
...
...
@@ -22,13 +22,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# SI-84 clean training data
...
...
egs/aurora4/s5/local/cstr_wsj_data_prep.sh
View file @
6423ac8d
...
...
@@ -25,13 +25,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# This version for SI-84
...
...
egs/babel/s5/local/cstr_wsj_data_prep.sh
View file @
6423ac8d
...
...
@@ -25,13 +25,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# This version for SI-84
...
...
egs/chime3/s5/local/clean_wsj0_data_prep.sh
View file @
6423ac8d
...
...
@@ -28,13 +28,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# This version for SI-84
...
...
egs/chime_wsj0/s5/local/clean_wsj0_data_prep.sh
View file @
6423ac8d
...
...
@@ -25,13 +25,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# This version for SI-84
...
...
egs/farsdat/s5/local/farsdat_prepare_dict.sh
View file @
6423ac8d
...
...
@@ -63,8 +63,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
if
[
!
-f
$IRSTLM
/bin/dict
]
;
then
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
...
...
@@ -76,10 +76,10 @@ fi
cut
-d
' '
-f2-
$srcdir
/text |
sed
-e
's:^:<s> :'
-e
's:$: </s>:'
\
>
$srcdir
/lm_train
$IRSTLM
/bin/
build-lm.sh
-i
$srcdir
/lm_train
-n
2
\
build-lm.sh
-i
$srcdir
/lm_train
-n
2
\
-o
$tmpdir
/lm_phone_bg.ilm.gz
$IRSTLM
/bin/
compile-lm
$tmpdir
/lm_phone_bg.ilm.gz
-t
=
yes
/dev/stdout |
\
compile-lm
$tmpdir
/lm_phone_bg.ilm.gz
-t
=
yes
/dev/stdout |
\
grep
-v
unk |
gzip
-c
>
$lmdir
/lm_phone_bg.arpa.gz
...
...
egs/gp/s1/local/gp_prep_lms_edin.sh
View file @
6423ac8d
...
...
@@ -52,6 +52,19 @@ do
esac
done
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$WDIR
;
tmpdir
=
$(
mktemp
-d
)
;
trap
'rm -rf "$tmpdir"'
EXIT
...
...
egs/gp/s5/local/gp_format_lm.sh
View file @
6423ac8d
...
...
@@ -49,6 +49,19 @@ while [ $# -gt 0 ]; do
esac
done
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
for
L
in
$LANGUAGES
;
do
lm
=
$LMDIR
/
${
L
}
.3gram.lm.gz
[
-f
$lm
]
||
{
echo
"LM '
$lm
' not found"
;
exit
1
;
}
...
...
egs/swbd/s5/run_edin.sh
View file @
6423ac8d
...
...
@@ -12,6 +12,19 @@ exit 1;
.
cmd.sh
.
path.sh
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
# Data prep
# Here we make some Edinburgh-specific changes from the Kaldi recipe in
# trunk/egs/swbd/s5 (rev. 1841). The major differences are that everything is
...
...
@@ -47,7 +60,8 @@ utils/format_lm_sri.sh --srilm-opts "$srilm_opts" \
data/lang
$LM
data/local/dict/lexicon.txt data/lang_sw1_fsh_tg
# For some funny reason we are still using IRSTLM for doing LM pruning :)
prune-lm
--threshold
=
1e-7 data/local/lm/sw1_fsh.o3g.kn.gz /dev/stdout
\
prune-lm
--threshold
=
1e-7 data/local/lm/sw1_fsh.o3g.kn.gz
\
/dev/stdout
\
|
gzip
-c
>
data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
LM
=
data/local/lm/sw1_fsh.o3g.pr1-7.kn.gz
utils/format_lm_sri.sh
--srilm-opts
"
$srilm_opts
"
\
...
...
egs/swbd/s5b/run.sh
View file @
6423ac8d
...
...
@@ -18,6 +18,19 @@ set -e # exit on error
# want to store MFCC features.
mfccdir
=
mfcc
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
# Prepare Switchboard data. This command can also take a second optional argument
# which specifies the directory to Switchboard documentations. Specifically, if
...
...
@@ -54,6 +67,7 @@ local/swbd1_train_lms.sh data/local/train/text \
data/local/dict/lexicon.txt data/local/lm
$fisher_dirs
# We don't really need all these options for SRILM, since the LM training script
# does some of the same processings (e.g. -subset -tolower)
for
order
in
3 4
;
do
lm_suffix
=
"tg"
[
$order
-eq
3
]
||
lm_suffix
=
"fg"
...
...
@@ -66,7 +80,6 @@ for order in 3 4; do
utils/build_const_arpa_lm.sh
$LM
data/lang data/lang_sw1_fsh_
$lm_suffix
# For some funny reason we are still using IRSTLM for doing LM pruning :)
export
PATH
=
$PATH
:../../../tools/irstlm/bin/
prune-lm
--threshold
=
1e-7 data/local/lm/sw1_fsh.o
${
order
}
g.kn.gz /dev/stdout
\
|
gzip
-c
>
data/local/lm/sw1_fsh.o
${
order
}
g.pr1-7.kn.gz
||
exit
1
LM
=
data/local/lm/sw1_fsh.o
${
order
}
g.pr1-7.kn.gz
...
...
egs/timit/s4/local/timit_data_prep.sh
View file @
6423ac8d
...
...
@@ -100,8 +100,8 @@ cut -f1 data/local/lexicon.txt \
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
if
[
!
-f
$IRSTLM
/bin/dict
]
;
then
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
...
...
@@ -116,10 +116,10 @@ cut -d' ' -f2- $srcdir/text | sed -e 's:^:<s> :' -e 's:$: </s>:' \
cut
-d
' '
-f2-
data/local/train.trans2 |
sed
-e
's:^:<s> :'
-e
's:$: </s>:'
\
>
data/local/lm_train.txt
$IRSTLM
/bin/
build-lm.sh
-i
data/local/lm_train.txt
-n
2
\
build-lm.sh
-i
data/local/lm_train.txt
-n
2
\
-o
data/local/lm_phone_bg.ilm.gz
$IRSTLM
/bin/
compile-lm data/local/lm_phone_bg.ilm.gz
--text
yes
/dev/stdout
\
compile-lm data/local/lm_phone_bg.ilm.gz
--text
yes
/dev/stdout
\
|
grep
-v
unk |
gzip
-c
>
data/local/lm_phone_bg.arpa.gz
)
>
& data/prepare_lm.log
...
...
egs/timit/s5/local/timit_prepare_dict.sh
View file @
6423ac8d
...
...
@@ -64,8 +64,8 @@ cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_))
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
if
[
!
-f
$IRSTLM
/bin/dict
]
;
then
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
...
...
@@ -77,10 +77,10 @@ fi
cut
-d
' '
-f2-
$srcdir
/train.text |
sed
-e
's:^:<s> :'
-e
's:$: </s>:'
\
>
$srcdir
/lm_train.text
$IRSTLM
/bin/
build-lm.sh
-i
$srcdir
/lm_train.text
-n
2
\
build-lm.sh
-i
$srcdir
/lm_train.text
-n
2
\
-o
$tmpdir
/lm_phone_bg.ilm.gz
$IRSTLM
/bin/
compile-lm
$tmpdir
/lm_phone_bg.ilm.gz
-t
=
yes
/dev/stdout |
\
compile-lm
$tmpdir
/lm_phone_bg.ilm.gz
-t
=
yes
/dev/stdout |
\
grep
-v
unk |
gzip
-c
>
$lmdir
/lm_phone_bg.arpa.gz
echo
"Dictionary & language model preparation succeeded"
egs/wsj/s5/local/cstr_wsj_data_prep.sh
View file @
6423ac8d
...
...
@@ -25,13 +25,25 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# This version for SI-84
...
...
egs/wsj/s5/local/wsj_data_prep.sh
View file @
6423ac8d
...
...
@@ -17,15 +17,26 @@ local=`pwd`/local
utils
=
`
pwd
`
/utils
.
./path.sh
# Needed for KALDI_ROOT
export
PATH
=
$PATH
:
$KALDI_ROOT
/tools/irstlm/bin
sph2pipe
=
$KALDI_ROOT
/tools/sph2pipe_v2.5/sph2pipe
if
[
!
-x
$sph2pipe
]
;
then
echo
"Could not find (or execute) the sph2pipe program at
$sph2pipe
"
;
exit
1
;
fi
cd
$dir
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
echo
"
$0
: Error: To install it, go to
$KALDI_ROOT
/tools"
>
&2
echo
"
$0
: Error: and run extras/install_irstlm.sh"
>
&2
exit
1
fi
cd
$dir
# Make directory of links to the WSJ disks such as 11-13.1. This relies on the command
# line arguments being absolute pathnames.
rm
-r
links/ 2>/dev/null
...
...
egs/wsj/s5/local/wsj_train_lms.sh
View file @
6423ac8d
...
...
@@ -178,8 +178,8 @@ ngram -lm $sdir/srilm.o3g.pr7.kn.gz -ppl $sdir/cleaned.heldout
if
[
-z
$IRSTLM
]
;
then
export
IRSTLM
=
$KALDI_ROOT
/tools/irstlm/
fi
if
[
!
-f
$IRSTLM
/bin/dict
]
;
then
export
PATH
=
${
PATH
}
:
$IRSTLM
/bin
if
!
command
-v
prune-lm
>
/dev/null 2>&1
;
then
echo
"
$0
: Error: the IRSTLM is not available or compiled"
>
&2
echo
"
$0
: Error: We used to install it by default, but."
>
&2
echo
"
$0
: Error: this is no longer the case."
>
&2
...
...
@@ -193,12 +193,12 @@ mkdir $idir
gunzip
-c
$srcdir
/cleaned.gz |
tail
-n
+
$heldout_sent
|
$IRSTLM
/scripts/add-start-end.sh |
\
gzip
-c
>
$idir
/train.gz
$IRSTLM
/bin/
dict
-i
=
WSJ.cleaned.irstlm.txt
-o
=
dico
-f
=
y
-sort
=
no
dict
-i
=
WSJ.cleaned.irstlm.txt
-o
=
dico
-f
=
y
-sort
=
no
cat
dico | gawk
'BEGIN{while (getline<"vocab.20k.nooov") v[$1]=1; print "DICTIONARY 0 "length(v);}FNR>1{if ($1 in v)\
{print $0;}}'
>
vocab.irstlm.20k
$IRSTLM
/bin/
build-lm.sh
-i
"gunzip -c
$idir
/train.gz"
-o
$idir
/lm_3gram.gz
-p
yes
\
build-lm.sh
-i
"gunzip -c
$idir
/train.gz"
-o
$idir
/lm_3gram.gz
-p
yes
\
-n
3
-s
improved-kneser-ney
-b
yes
# Testing perplexity with SRILM tools:
ngram
-lm
$idir
/lm_3gram.gz
-ppl
$sdir
/cleaned.heldout
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment