Commit 512cead2 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,s5/utils : when merding data-dirs, allow passing extra list of filenames to be merged



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@2869 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 7b3d82ad
#!/bin/bash #!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script operates on a data directory, such as in data/train/. # This script operates on a data directory, such as in data/train/.
# See http://kaldi.sourceforge.net/data_prep.html#data_prep_data # See http://kaldi.sourceforge.net/data_prep.html#data_prep_data
# for what these directories contain. # for what these directories contain.
# Begin configuration section.
extra_files= #specify addtional files in 'src-data-dir' to merge, ex. "file1 file2 ..."
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 2 ]; then if [ $# -lt 2 ]; then
echo "Usage: combine_data.sh <dest-data-dir> <src-data-dir1> <src-data-dir2> ..." echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
exit 1 exit 1
fi fi
...@@ -21,7 +28,7 @@ mkdir -p $dest; ...@@ -21,7 +28,7 @@ mkdir -p $dest;
export LC_ALL=C export LC_ALL=C
for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp; do for file in utt2spk feats.scp text cmvn.scp segments reco2file_and_channel wav.scp $extra_files; do
if [ -f $first_src/$file ]; then if [ -f $first_src/$file ]; then
( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1; ( for f in $*; do cat $f/$file; done ) | sort -k1 > $dest/$file || exit 1;
echo "$0: combined $file" echo "$0: combined $file"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment