Commit 15c5c3fd authored by Ning Ma's avatar Ning Ma
Browse files

(trunk/egs/chime1) added a kaldi recipe for the 1st CHiME challenge (GRID corpus)

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5124 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent ce9a7a1c
This is a kaldi setup for 1st CHiME challenge. See
http://spandh.dcs.shef.ac.uk/projects/chime/challenge.html
for more detailed information.
The setup should also work for GRID corpus and 2nd CHiME challenge track 1
http://spandh.dcs.shef.ac.uk/gridcorpus/
http://spandh.dcs.shef.ac.uk/chime_challenge/chime2013/
Quick instruction:
1) download CHiME1 data
Check the download page http://spandh.dcs.shef.ac.uk/projects/chime/PCC/datasets.html
Train set
http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_train_reverberated.tar.gz
Devel set
http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_devel_isolated.tar.gz
Test set
http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_test_isolated.tar.gz
2) move to Kaldi CHiME1 directory, e.g.,
cd kaldi-trunk/egs/chime1/s5
3a) specify Kaldi directory in path.sh,
export KALDI_ROOT="<your kaldi directory>/kaldi-trunk"
3b) specify CHiME1 signal directory and CHiME1 recogniser directory for your
username ($USER) in config.sh.
By default, directories data/ exp/ mfcc/ will be created by the recipe in the
Kaldi CHiME1 recogniser directory. You could link these to directories on a
different disk space or specify a different directory in config.sh,
export WAV_ROOT="<your CHiME1 directory>/PCCdata16kHz"
export REC_ROOT="."
4) execute run.sh
./run.sh
4*) we suggest to use the following command to save the main log file
nohup ./run.sh > run.log
5) You can find result at exp/tri2b/decode_*/keyword_scores.txt
==== Devel set
Keyword (letter+digit) recognition accuracy (%)
-----------------------------------------------------------------
SNR -6dB -3dB 0dB 3dB 6dB 9dB Average
-----------------------------------------------------------------
Overall 38.00 42.75 54.08 64.50 75.17 83.92 59.74
-----------------------------------------------------------------
Letter 30.17 34.17 44.67 52.17 64.50 74.00 49.94
Digit 45.83 51.33 63.50 76.83 85.83 93.83 69.53
-----------------------------------------------------------------
==== Test set
Keyword (letter+digit) recognition accuracy (%)
-----------------------------------------------------------------
SNR -6dB -3dB 0dB 3dB 6dB 9dB Average
-----------------------------------------------------------------
Overall 36.25 40.92 53.33 64.58 74.92 83.92 58.99
-----------------------------------------------------------------
Letter 30.50 32.00 44.33 56.00 63.33 75.33 50.25
Digit 42.00 49.83 62.33 73.17 86.50 92.50 67.72
-----------------------------------------------------------------
# "queue.pl" uses qsub. The options to it are
# options to qsub. If you have GridEngine installed,
# change this to a queue you have access to.
# Otherwise, use "run.pl", which will run jobs locally
# (make sure your --num-jobs options are no more than
# the number of cpus on your machine.
#a) JHU cluster options
#export train_cmd="queue.pl -l arch=*64"
#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
#export cuda_cmd="..."
#b) BUT cluster options
#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
#c) USFD cluster options
#config="conf/queue_usfd.conf"
#export train_cmd="queue.pl --config $config --mem 8G --rmem 4G"
#export decode_cmd="queue.pl --config $config --mem 8G --rmem 4G"
#export mkgraph_cmd="queue.pl --config $config --mem 8G --rmem 4G"
#export cuda_cmd="queue.pl --config $config --mem 24G --rmem 20G --gpu 1 --time 24:00:00"
#d) run it locally...
export train_cmd=run.pl
export decode_cmd=run.pl
export cuda_cmd=run.pl
export mkgraph_cmd=run.pl
--use-energy=false # only non-default option.
--sample-frequency=16000 # sampled at 16kHz
command qsub -v PATH -j y
option mem=* -l mem=$0,rmem=$0
option mem=0 # Do not add anything to qsub_opts
option rmem=* -l rmem=$0
option rmem=0 # Do not add anything to qsub_opts
option num_threads=* -pe openmp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
option time=* -l h_rt=$0
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -P gpu
<Topology>
<TopologyEntry>
<ForPhones>
NONSILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
<State> 3 </State>
</TopologyEntry>
<TopologyEntry>
<ForPhones>
SILENCEPHONES
</ForPhones>
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
<State> 5 </State>
</TopologyEntry>
</Topology>
case "$USER" in
"ac1nmx")
# CHiME Challenge wav root (after unzipping)...
export WAV_ROOT="/data/ac1nmx/data/PCCdata16kHz"
# Used by the recogniser for storing data/ exp/ mfcc/ etc
export REC_ROOT="."
;;
*)
echo "Please define WAV_ROOT and REC_ROOT for user $USER"
;;
esac
BIN B IH N
LAY L EY
PLACE P L EY S
SET S EH T
RED R EH D
GREEN G R IY N
BLUE B L UW
WHITE W AY T
AT AE T
BY B AY
IN IH N
WITH W IH DH
A EY
B B IY
C S IY
D D IY
E IY
F EH F
G JH IY
H EY CH
I AY
J JH AX IY
K K EY
L EH L
M EH M
N EH N
O OW
P P IY
Q K Y UW
R AA
S EH S
T T IY
U Y UW
V V IY
X EH K S
Y W AY
Z Z EH D
ONE W AH N
TWO T UW
THREE TH R IY
FOUR F AO
FIVE F AY V
SIX S IH K S
SEVEN S EH V N
EIGHT EY T
NINE N AY N
ZERO Z IA R OW
AGAIN AX G EH N
NOW N AW
PLEASE P L IY Z
SOON S UW N
#!/bin/bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# This script prepares the data/ directory for the CHiME/GRID corpus
. ./config.sh # Needed for REC_ROOT and WAV_ROOT
# Setup relevant folders
data="$REC_ROOT/data"
locdata="$data/local"
mkdir -p "$locdata"
utils="utils"
# Setup wav folders
wav_train="$WAV_ROOT/train/reverberated"
wav_devel="$WAV_ROOT/devel/isolated"
wav_test="$WAV_ROOT/test/isolated"
if [ ! -d $wav_train ]; then
echo "Cannot find wav directory $wav_train"
echo "Please download the CHiME Challenge Data from"
echo " train set http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_train_reverberated.tar.gz"
exit 1;
fi
set_list="train"
mkdir -p "$data/train"
if [ -d "$wav_devel" ]; then
set_list="$set_list devel"
mkdir -p "$data/devel"
fi
if [ -d "$wav_test" ]; then
set_list="$set_list test"
mkdir -p "$data/test"
fi
echo "Preparing data sets: $set_list"
# Create scp files
scp="$data/train/wav.scp"
rm -f "$scp"
for sid in `seq 34`; do
sid2=`printf "s%02d" $sid`
ls -1 $wav_train/id$sid/*.wav \
| sed "s/\(.*\)\/\(.*\).wav/${sid2}_\2\t\1\/\2.wav/" \
| sort >> $scp
done
for x in "devel" "test"; do
if [ -d "$data/$x" ]; then
scp="$data/$x/wav.scp"
rm -f "$scp"
wav_var="wav_$x"
wav_dir="${!wav_var}"
for sid in `seq 34`; do
sid2=`printf "s%02d" $sid`
ls -1 $wav_dir/*/s${sid}_*.wav \
| sed "s/\(.*\)\/\(.*\)\/s.*_\(.*\).wav/${sid2}_\3_\2\t\1\/\2\/s${sid}_\3.wav/" \
| sort >> $scp
done
fi
done
# Prepare other files in data/setname/
for x in $set_list; do
scp="$data/$x/wav.scp"
if [ -f "$scp" ]; then
# Create transcription files
cut -f1 $scp | local/create_chime1_trans.pl - > "$data/$x/text"
# Create utt2spk files
# No speaker ID
sed 's/\(.*\)\t.*/\1\t\1/' < "$scp" > "$data/$x/utt2spk"
# Use speaker ID
#sed "s/\(s..\)\(.*\)\t.*/\1\2\t\1/" < "$scp" > "$data/$x/utt2spk"
# Create spk2utt files
cat "$data/$x/utt2spk" | $utils/utt2spk_to_spk2utt.pl > "$data/$x/spk2utt" || exit 1;
fi
done
echo "--> Data preparation succeeded"
exit 0
#!/bin/bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Kaldi scripts for preparing dictionary for the GRID corpus (or CHiME 1)
echo "Preparing dictionary"
. ./config.sh # Needed for REC_ROOT and WAV_ROOT
# Prepare relevant folders
dict="$REC_ROOT/data/local/dict"
mkdir -p $dict
utils="utils"
# Copy lexicon
lexicon="input/lexicon.txt" # phone models
cp $lexicon $dict/lexicon.txt
# Generate phone list
sil="SIL"
phone_list="$dict/phone.list"
awk '{for (n=2;n<=NF;n++)print $n;}' $lexicon | sort -u > $phone_list
echo $sil >> $phone_list
# Create phone lists
grep -v -w $sil $phone_list > $dict/nonsilence_phones.txt
echo $sil > $dict/silence_phones.txt
echo $sil > $dict/optional_silence.txt
# list of "extra questions"-- empty; we don't have things like tone or
# word-positions or stress markings.
touch $dict/extra_questions.txt
echo "-->Dictionary preparation succeeded"
exit 0
#!/bin/bash
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Scripts for preparing grammar for the GRID corpus (or CHiME 1)
echo "Preparing grammar for test"
. ./config.sh # Needed for REC_ROOT and WAV_ROOT
# Setup relevant folders
lang="$REC_ROOT/data/lang"
utils="utils"
# Create FST grammar for the GRID
grammar_cmd="local/create_chime1_grammar.pl"
$grammar_cmd | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt \
--keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=ilabel \
> $lang/G.fst || exit 1
# Draw the FST
#echo "fstdraw --isymbols=$lang/words.txt --osymbols=$lang/words.txt $lang/G.fst | dot -Tps > local/G.ps"
echo "--> Grammar preparation succeeded"
exit 0
#!/usr/bin/env perl
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Computes keyword recognition accuracy (letter+digit) for the CHiME/GRID
# corpus from a transcription file containing:
# s01_bgaa9a_0dB BIN GREEN IN R NINE AGAIN
# s01_bgaa9a_3dB BIN GREEN AT A NINE AGAIN
#
# Usage: compute_chime1_scores.pl exp/tri1/decode_devel/scoring/trans.txt
#
use strict;
use warnings;
my $in_list = $ARGV[0];
open my $info, $in_list or die "could not open $in_list: $!";
my %snr_scores_letter = ();
my %snr_scores_digit = ();
my %snr_count = ();
while (my $line = <$info>) {
chomp($line);
my @words = split /\s+/, $line;
my @tokens = split "_", $words[0];
my $ref = $tokens[1];
my $snr = $tokens[2];
# Extract letter and digit
my $letter = uc(substr($ref, 3, 1));
my $digit = substr($ref, 4, 1);
if ($digit eq "z") { $digit = "ZERO" }
elsif ($digit eq "1") { $digit = "ONE" }
elsif ($digit eq "2") { $digit = "TWO" }
elsif ($digit eq "3") { $digit = "THREE" }
elsif ($digit eq "4") { $digit = "FOUR" }
elsif ($digit eq "5") { $digit = "FIVE" }
elsif ($digit eq "6") { $digit = "SIX" }
elsif ($digit eq "7") { $digit = "SEVEN" }
elsif ($digit eq "8") { $digit = "EIGHT" }
elsif ($digit eq "9") { $digit = "NINE" }
# Compute score
my $nwords = scalar @words;
if (($nwords > 4) && ($letter eq $words[4])) { $snr_scores_letter{$snr}++; }
if (($nwords > 5) && ($digit eq $words[5])) { $snr_scores_digit{$snr}++; }
$snr_count{$snr}++;
}
# Print out keyword accuracies
printf "\nKeyword (letter+digit) recognition accuracy (%%)\n";
printf "-----------------------------------------------------------------\n";
printf "%-10s", "SNR";
my @all_snrs = ("m6dB", "m3dB", "0dB", "3dB", "6dB", "9dB");
foreach (@all_snrs) {
my $snr = $_;
$snr =~ s/m/-/;
printf "%-8s", $snr;
}
printf "%-8s", "Average";
printf "\n-----------------------------------------------------------------\n";
printf "%-10s", "Overall";
my $score_avg = 0;
my $nsnrs = scalar @all_snrs;
foreach (@all_snrs) {
my $score = ($snr_scores_letter{$_}+$snr_scores_digit{$_})/2/$snr_count{$_}*100;
$score_avg += $score;
printf "%-8.2f", $score;
}
printf "%-8.2f", $score_avg/$nsnrs;
printf "\n-----------------------------------------------------------------\n";
printf "%-10s", "Letter";
$score_avg = 0;
foreach (@all_snrs) {
my $score = $snr_scores_letter{$_}/$snr_count{$_}*100;
$score_avg += $score;
printf "%-8.2f", $score;
}
printf "%-8.2f", $score_avg/$nsnrs;
printf "\n";
printf "%-10s", "Digit";
$score_avg = 0;
foreach (@all_snrs) {
my $score = $snr_scores_digit{$_}/$snr_count{$_}*100;
$score_avg += $score;
printf "%-8.2f", $score;
}
printf "%-8.2f", $score_avg/$nsnrs;
printf "\n-----------------------------------------------------------------\n";
#!/usr/bin/env perl
#
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Prepare a simple grammar G.fst for the GRID corpus (CHiME 1/2)
# with silence at the beginning and the end of each utterance.
#
use strict;
use warnings;
# GRID has the following grammar:
# verb=bin|lay|place|set
# colour=blue|green|red|white
# prep=at|by|in|with
# letter=a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|x|y|z
# digit=zero|one|two|three|four|five|six|seven|eight|nine
# coda=again|now|please|soon
# sil $verb $colour $prep $letter $digit $coda sil
my $state = 0;
my $state2 = $state + 1;
#my $sil = "<SIL>";
#print "$state $state2 $sil $sil 0.0\n";
#$state++;
#$state2 = $state + 1;
my @words = ("BIN", "LAY", "PLACE", "SET");
my $nWords = @words;
my $penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
$state++;
$state2 = $state + 1;
@words = ("BLUE", "GREEN", "RED", "WHITE");
$nWords = @words;
$penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
$state++;
$state2 = $state + 1;
@words = ("AT", "BY", "IN", "WITH");
$nWords = @words;
$penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
$state++;
$state2 = $state + 1;
@words = ("A".."V", "X", "Y", "Z");
$nWords = @words;
$penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
$state++;
$state2 = $state + 1;
@words = ("ZERO", "ONE", "TWO", "THREE", "FOUR", "FIVE", "SIX", "SEVEN", "EIGHT", "NINE");
$nWords = @words;
$penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
$state++;
$state2 = $state + 1;
@words = ("AGAIN", "NOW", "PLEASE", "SOON");
$nWords = @words;
$penalty = -log(1.0/$nWords);
foreach (@words) { print "$state $state2 $_ $_ $penalty\n"; }
#$state++;
#$state2 = $state + 1;
#print "$state $state2 $sil $sil 0.0\n";
print "$state2 0.0\n";
#!/usr/bin/env perl
#
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Create transcriptions for the CHIME/GRID corpus from a list of
# file names (used as UTTERANCE-ID, e.g. s1_bgab3n)
# It outputs lines containing UTTERANCE-ID TRANSCRIPTIONS, e.g.
# s1_bgab3n BIN GREEN AT B THREE NOW
#
# Usage: create_chime1_trans.pl train.flist
use strict;
use warnings;
# Define silence label at begin/end of an utternace
my $sil = "<SIL>";
my $in_list = $ARGV[0];
open my $info, $in_list or die "could not open $in_list: $!";
while (my $line = <$info>) {
chomp($line);
$line =~ s/\.[^.]+$//; # Remove extension just in case
my @tokens = split("_", $line);
my @chars = split("", $tokens[1]);
my $trans;
if ($chars[0] eq "b") { $trans = "BIN"}
elsif ($chars[0] eq "l") { $trans = "LAY" }
elsif ($chars[0] eq "p") { $trans = "PLACE" }
elsif ($chars[0] eq "s") { $trans = "SET" }
else { $trans = "!UNKNOWN"}
if ($chars[1] eq "b") { $trans = $trans . " BLUE" }
elsif ($chars[1] eq "g") { $trans = $trans . " GREEN" }
elsif ($chars[1] eq "r") { $trans = $trans . " RED" }
elsif ($chars[1] eq "w") { $trans = $trans . " WHITE" }
else { $trans = $trans . "!UNKNOWN"}
if ($chars[2] eq "a") { $trans = $trans . " AT" }
elsif ($chars[2] eq "b") { $trans = $trans . " BY" }
elsif ($chars[2] eq "i") { $trans = $trans . " IN" }
elsif ($chars[2] eq "w") { $trans = $trans . " WITH" }
else { $trans = $trans . "!UNKNOWN"}
$trans = $trans . " " . uc($chars[3]);
if ($chars[4] eq "z") { $trans = $trans . " ZERO" }
elsif ($chars[4] eq "1") { $trans = $trans . " ONE" }
elsif ($chars[4] eq "2") { $trans = $trans . " TWO" }
elsif ($chars[4] eq "3") { $trans = $trans . " THREE" }
elsif ($chars[4] eq "4") { $trans = $trans . " FOUR" }
elsif ($chars[4] eq "5") { $trans = $trans . " FIVE" }
elsif ($chars[4] eq "6") { $trans = $trans . " SIX" }
elsif ($chars[4] eq "7") { $trans = $trans . " SEVEN" }
elsif ($chars[4] eq "8") { $trans = $trans . " EIGHT" }
elsif ($chars[4] eq "9") { $trans = $trans . " NINE" }