Commit 732d2833 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: various mostly cosmetic fixes.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4362 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 7b84e0d3
......@@ -118,4 +118,5 @@ utils/fix_data_dir.sh $dest
if [ $(wc -l < $dest/wav.scp) -ne 80 ]; then
echo "$0: error: expected 80 lines in wav.scp, got $(wc -l < $dest/wav.scp)"
exit 1;
fi
......@@ -25,9 +25,9 @@ if [ $# != 3 ]; then
echo "usage: make_fbank_pitch.sh [options] <data-dir> <log-dir> <path-to-fbank-pitch-dir>";
echo "options: "
echo " --fbank-config <config-file> # config passed to compute-fbank-feats "
echo " --pitch_config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch_postprocess_config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste_length_tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste-length-tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
......
......@@ -24,10 +24,10 @@ if [ -f path.sh ]; then . ./path.sh; fi
if [ $# != 3 ]; then
echo "usage: make_mfcc_pitch.sh [options] <data-dir> <log-dir> <path-to-mfcc-pitch-dir>";
echo "options: "
echo " --mfcc_config <mfcc-config-file> # config passed to compute-mfcc-feats "
echo " --pitch_config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch_postprocess_config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste_length_tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --mfcc-config <mfcc-config-file> # config passed to compute-mfcc-feats "
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste-length-tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
......
#!/bin/bash
# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
# 2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Combine MFCC and online-pitch features together
# Note: This file is based on make_mfcc_pitch.sh
# Begin configuration section.
nj=4
cmd=run.pl
mfcc_config=conf/mfcc.conf
online_pitch_config=conf/online_pitch.conf
paste_length_tolerance=2
compress=true
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "usage: make_mfcc_pitch.sh [options] <data-dir> <log-dir> <path-to-mfcc-pitch-dir>";
echo "options: "
echo " --mfcc-config <mfcc-config-file> # config passed to compute-mfcc-feats, default "
echo " # is conf/mfcc.conf"
echo " --online-pitch-config <online-pitch-config-file> # config passed to compute-and-process-kaldi-pitch-feats, "
echo " # default is conf/online_pitch.conf"
echo " --paste-length-tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
logdir=$2
mfcc_pitch_dir=$3
# make $mfcc_pitch_dir an absolute pathname.
mfcc_pitch_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfcc_pitch_dir ${PWD}`
# use "name" as part of name of the archive.
name=`basename $data`
mkdir -p $mfcc_pitch_dir || exit 1;
mkdir -p $logdir || exit 1;
if [ -f $data/feats.scp ]; then
mkdir -p $data/.backup
echo "$0: moving $data/feats.scp to $data/.backup"
mv $data/feats.scp $data/.backup
fi
scp=$data/wav.scp
required="$scp $mfcc_config $online_pitch_config"
for f in $required; do
if [ ! -f $f ]; then
echo "make_mfcc_pitch.sh: no such file $f"
exit 1;
fi
done
utils/validate_data_dir.sh --no-text --no-feats $data || exit 1;
if [ -f $data/spk2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/spk2warp"
vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk"
elif [ -f $data/utt2warp ]; then
echo "$0 [info]: using VTLN warp factors from $data/utt2warp"
vtln_opts="--vtln-map=ark:$data/utt2warp"
fi
for n in $(seq $nj); do
# the next command does nothing unless $mfcc_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.ark
done
if [ -f $data/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config ark:- ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance."
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
done
utils/split_scp.pl $scp $split_scps || exit 1;
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
ark,scp:$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.ark,$mfcc_pitch_dir/raw_mfcc_online_pitch_$name.JOB.scp \
|| exit 1;
fi
if [ -f $logdir/.error.$name ]; then
echo "Error producing mfcc & pitch features for $name:"
tail $logdir/make_mfcc_pitch_${name}.1.log
exit 1;
fi
# concatenate the .scp files together.
for n in $(seq $nj); do
cat $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.scp || exit 1;
done > $data/feats.scp
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
nf=`cat $data/feats.scp | wc -l`
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
if [ $nf -lt $[$nu - ($nu/20)] ]; then
echo "Less than 95% the features were successfully generated. Probably a serious error."
exit 1;
fi
echo "Succeeded creating MFCC & online-pitch features for $name"
......@@ -25,9 +25,9 @@ if [ $# != 3 ]; then
echo "usage: make_plp_pitch.sh [options] <data-dir> <log-dir> <path-to-plp-pitch-dir>";
echo "options: "
echo " --plp-config <config-file> # config passed to compute-plp-feats "
echo " --pitch_config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch_postprocess_config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste_length_tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
echo " --paste-length-tolerance <tolerance> # length tolerance passed to paste-feats"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
......
......@@ -36,9 +36,14 @@
The <a href=https://sourceforge.net/projects/kaldi/forums> Kaldi forums on Sourceforge </a> are
a good place to ask questions.
For the slides for a short series of Kaldi lectures that were delivered in 2012,
see <a href=https://sites.google.com/site/dpovey/kaldi-lectures> here </a>.
The site <a href=kaldi-asr.org> kaldi-asr.org </a> was created in August 2014 to host the Kaldi model-build
directories that were built with the example scripts. This will be useful as a reference for
debugging and for those who want access to the built models. With time it will contain an increasingly
large fraction of the outputs of the example scripts in the repository.
For the slides for a short series of Kaldi lectures that were delivered in 2012,
see <a href=https://sites.google.com/site/dpovey/kaldi-lectures> here </a>. Note:
these are increasingly out of date.
<p>
The slides for the presentation that was made at the ICASSP meeting in Prague in 2011 are here:
......
......@@ -190,7 +190,7 @@ struct PitchExtractionOptions {
po->Register("snip-edges", &snip_edges, "If this is set to false, the "
"incomplete frames near the ending edge won't be snipped, so "
"that the number of frames is the file size divided by the "
"frame-shift. This makes different types of features give the"
"frame-shift. This makes different types of features give the "
"same number of frames.");
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment