Commit 2bd491d1 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: various script changes to better support Mac OS X. Avoiding the '-T'...

trunk: various script changes to better support Mac OS X.  Avoiding the '-T' option to 'cp', and the '>/dev/stderr' syntax in awk, which is only supported by GNU awk.  Also providing an alternative to 'du -b' for getting file sizes, to failover to 'stat' on Mac

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4877 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 2884f22c
......@@ -38,7 +38,8 @@ echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
cp -rT data/lang $test
mkdir -p $test
cp -r data/lang/* $test
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt
......
......@@ -47,8 +47,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
......
......@@ -11,7 +11,8 @@ steps/train_raw_sat.sh --cmd "$train_cmd" \
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
mkdir -p data/$y
cp data/$x/* data/$y || true
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;
......
......@@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause
......
......@@ -38,7 +38,8 @@ echo Preparing language models for test
for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
test=data/lang_test_${lm_suffix}
cp -rT data/lang $test
mkdir -p $test
cp -r data/lang/* $test
gunzip -c $lmdir/lm_${lm_suffix}.arpa.gz | \
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt
......
......@@ -9,7 +9,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause
......
......@@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause
......
......@@ -8,7 +8,8 @@ mkdir -p data/lang_test
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test
mkdir -p data/lang_test
cp -r data/lang/* data/lang_test
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause
......
......@@ -8,7 +8,8 @@ mkdir -p data/lang_test_fsh
arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
cp -rT data/lang data/lang_test_fsh
mkdir -p data/lang_test_fsh
cp -r data/lang/* data/lang_test_fsh
# grep -v '<s> <s>' etc. is only for future-proofing this script. Our
# LM doesn't have these "invalid combinations". These can cause
......
......@@ -33,7 +33,10 @@ function check_and_download () {
echo "Downloading file '$fname' into '$dst_dir'..."
expect_size="${sizes["$fname"]}"
if [[ -s $dst_dir/$fname ]]; then
fsize=$(du -b $dst_dir/$fname | awk '{print $1}')
# In the following statement, the first version works on linux, and the part
# after '||' works on Linux.
f=$dst_dir/$fname
fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
if [[ "$fsize" -eq "$expect_size" ]]; then
echo "'$fname' already exists and appears to be complete"
return 0
......@@ -45,7 +48,10 @@ function check_and_download () {
echo "Error while trying to download $fname!"
return 1
}
fsize=$(du -b $dst_dir/$fname | awk '{print $1}')
f=$dst_dir/$fname
# In the following statement, the first version works on linux, and the part after '||'
# works on Linux.
fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f)
[[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; }
return 0
}
......
......@@ -47,7 +47,8 @@ for lm_suffix in tgsmall tgmed; do
# tglarge is prepared by a separate command, called from run.sh; we don't
# want to compile G.fst for tglarge, as it takes a while.
test=${src_dir}_test_${lm_suffix}
cp -rT ${src_dir} $test
mkdir -p $test
cp -r ${src_dir}/* $test
gunzip -c $lm_dir/lm_${lm_suffix}.arpa.gz |\
utils/find_arpa_oovs.pl $test/words.txt > $tmpdir/oovs_${lm_suffix}.txt || exit 1
......
......@@ -54,8 +54,8 @@ if [ $stage -le 3 ]; then
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
......
......@@ -62,8 +62,8 @@ if [ $stage -le 3 ]; then
fbank_conf=conf/fbank_40.conf
echo "--num-mel-bins=40" > $fbank_conf
for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "run.pl" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
......
......@@ -10,8 +10,8 @@ tmpdir=data/local/tmp
. ./path.sh || exit 1; # for KALDI_ROOT
cp -rT data/lang data/lang_ug
mkdir -p data/lang_ug
cp -r data/lang/* data/lang_ug
rm -rf data/lang_ug/tmp
cat data/train/text | \
......
......@@ -11,7 +11,8 @@ set -e
# train linear vtln
steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 \
data/train data/lang exp/tri2a exp/tri3d
cp -rT data/train data/train_vtln
mkdir -p data/train_vtln
cp data/train/* data/train_vtln || true
cp exp/tri3d/final.warp data/train_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
......@@ -19,7 +20,8 @@ steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3d/graph data/test exp/tri3d/decode
cp -rT data/test data/test_vtln
mkdir -p data/test_vtln
cp data/test/* data/test_vtln || true
cp exp/tri3d/decode/final.warp data/test_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/test_vtln exp/make_mfcc/test_vtln $featdir
steps/compute_cmvn_stats.sh data/test_vtln exp/make_mfcc/test_vtln $featdir
......
......@@ -9,7 +9,8 @@ set -e
steps/train_lvtln.sh --cmd "$train_cmd" 1800 9000 \
data/train data/lang exp/tri2b exp/tri3e
cp -rT data/train data/train_vtln
mkdir -p data/train_vtln
cp data/train/* data/train_vtln || true
cp exp/tri3e/final.warp data/train_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
......@@ -17,7 +18,8 @@ steps/compute_cmvn_stats.sh data/train_vtln exp/make_mfcc/train_vtln $featdir
steps/decode_lvtln.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
exp/tri3e/graph data/test exp/tri3e/decode
cp -rT data/test data/test_vtln
mkdir -p data/test_vtln
cp data/test/* data/test_vtln || true
cp exp/tri3e/decode/final.warp data/test_vtln/spk2warp
steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/test_vtln exp/make_mfcc/test_vtln $featdir
steps/compute_cmvn_stats.sh data/test_vtln exp/make_mfcc/test_vtln $featdir
......
......@@ -47,8 +47,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
......
......@@ -72,8 +72,8 @@ if [ $stage -le 3 ]; then # create testing fbank data.
featdir=`pwd`/mfcc
fbank_conf=conf/fbank_40.conf
for x in test_eval92 test_eval93 test_dev93; do
cp -rT data/$x data/${x}_fbank
rm -r ${x}_fbank/split* || true
mkdir -p data/${x}_fbank
cp data/$x/* data/${x}_fbank || true
steps/make_fbank.sh --fbank-config "$fbank_conf" --nj 8 \
--cmd "$train_cmd" data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
steps/compute_cmvn_stats.sh data/${x}_fbank exp/make_fbank/$x $featdir || exit 1;
......
......@@ -11,7 +11,8 @@ steps/train_raw_sat.sh --cmd "$train_cmd" \
mfccdir=mfcc
for x in test_eval92 test_eval93 test_dev93 ; do
y=${x}_utt
cp -rT data/$x data/$y
mkdir -p data/$y
cp data/$x/* data/$y || true
cat data/$x/utt2spk | awk '{print $1, $1;}' > data/$y/utt2spk;
cp data/$y/utt2spk data/$y/spk2utt;
steps/compute_cmvn_stats.sh data/$y exp/make_mfcc/$y $mfccdir || exit 1;
......
......@@ -49,8 +49,7 @@ idngram2lm -linear -idngram $lmdir/sprak.idngram -vocab \
test=data/lang_test_${lm_suffix}
mkdir -p $test
cp -rT data/lang $test
cp -r data/lang/* $test
cat $lmdir/sprak.arpa | \
utils/find_arpa_oovs.pl $test/words.txt > $lmdir/oovs_${lm_suffix}.txt
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment