Commit 9cb4c4c2 authored by David Snyder's avatar David Snyder
Browse files

trunk: Adding GMM-based speaker recognition example for NIST SRE2010 in egs/sre10

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5205 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 1d89d81a
This directory (sre10) contains example scripts for the NIST SRE 2010
speaker recognition evaluation. The following corpora are required:
NIST SRE 2010 training set
NIST SRE 2010 test set
More details on NIST SRE 2010 can be found at the url
http://www.itl.nist.gov/iad/mig/tests/sre/2010/. Additional data sources
are required by the subdirectories. See the corresponding README.txt files
in the subdirectories for more details.
The subdirectories "v1" and so on are different versions of the recipe;
we don't call them "s1" etc., because they don't really correspond to
the speech recognition recipes.
Data required for system development (on top of the data for testing described
in ../README.txt).
Corpus LDC Catalog No.
SWBD2 Phase 2 LDC99S79
SWBD2 Phase 3 LDC2002S06
SWBD Cellular 1 LDC2001S13
SWBD Ceullar 2 LDC2004S07
SRE2004 LDC2006S44
SRE2005 Train LDC2011S01
SRE2005 Test LDC2011S04
SRE2006 Train LDC2011S09
SRE2006 Test 1 LDC2011S10
SRE2006 Test 2 LDC2012S01
SRE2008 Train LDC2011S05
SRE2008 Test LDC2011S08
# "queue.pl" uses qsub. The options to it are
# options to qsub. If you have GridEngine installed,
# change this to a queue you have access to.
# Otherwise, use "run.pl", which will run jobs locally
# (make sure your --num-jobs options are no more than
# the number of cpus on your machine.
#a) JHU cluster options
export train_cmd="queue.pl -l arch=*64*"
export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
#export cuda_cmd="..."
export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G"
#b) BUT cluster options
#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
#c) run it locally...
#export train_cmd=run.pl
#export decode_cmd=run.pl
export cuda_cmd=run.pl
#export mkgraph_cmd=run.pl
--sample-frequency=8000
--frame-length=25 # the default is 25
--low-freq=20 # the default.
--high-freq=3700 # the default is zero meaning use the Nyquist (4k in this case).
--num-ceps=20 # higher than the default which is 12.
--snip-edges=false
--vad-energy-threshold=5.5
--vad-energy-mean-scale=0.5
#!/bin/bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
# This script trains an LDA transform and does cosine scoring.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 6 ]; then
echo "Usage: $0 <enroll-data-dir> <test-data-dir> <enroll-ivec-dir> <test-ivec-dir> <trials-file> <scores-dir>"
fi
enroll_data_dir=$1
test_data_dir=$2
enroll_ivec_dir=$3
test_ivec_dir=$4
trials=$5
scores_dir=$6
mkdir -p $scores_dir
cat $trials | awk '{print $1, $2}' | \
ivector-compute-dot-products - \
scp:${enroll_ivec_dir}/spk_ivector.scp \
scp:${test_ivec_dir}/ivector.scp \
$scores_dir/cosine_scores
#!/bin/bash
# This shell-script is just to show how you would plot det curves
# using DETWare.
# This assumes you have a file called "foo" with the scores in.
# Note: there are some comments at the bottom showing what you
# have to run in matlab. It won't get run if you just run the script,
# you have to do it manually.
trials=data/sre08_trials/short2-short3-female.trials
for condition in $(seq 8); do
(
# see http://www.itl.nist.gov/iad/mig/tests/sre/2008/official_results/
# for interpretation of condition here, e.g. 1 = "Interview train and test".
# Condition 6 is "telephone train and test".
# The EER is
awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | grep -w target | \
awk 'BEGIN {printf( "target = [ " );} {print $1} END{printf("];\n");}'
awk '{print $3}' foo | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | grep -w nontarget | \
awk 'BEGIN {printf( "nontarget = [ " );} {print $1} END{printf("];\n");}'
) > ~/DETware_v2.1/scores${condition}.m
done
# Note: the DETware_v2.1 directory is as extracted from the DETware package,
# which I got at.
# http://www.itl.nist.gov/iad/mig//tools/DETware_v2.1.targz.htm
# cd ~/DETware_v2.1/
# matlab
# and run at the matlab prompt:
# >> scores6
# >> [P_miss,P_fa] = Compute_DET(target, nontarget)
# >> Plot_DET(P_miss, P_fa, 'r')
# that particular result can be compared with the short2-short3 results here
# http://www.itl.nist.gov/iad/mig/tests/sre/2008/official_results/
# particularly the telephone-train, telephone-test condition which is here:
# http://www.itl.nist.gov/iad/mig/tests/sre/2008/official_results/dets/short2-short3.allPrimarySytems.16.det.png
# Note: this is the only condition we seem to be doing comparable to the systems there,
# presumably because it matches the training data we used.
#!/bin/bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
# This script trains an LDA transform, applies it to the enroll and
# test i-vectors and does cosine scoring.
use_existing_models=false
lda_dim=150
covar_factor=0.1
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 8 ]; then
echo "Usage: $0 <lda-data-dir> <enroll-data-dir> <test-data-dir> <lda-ivec-dir> <enroll-ivec-dir> <test-ivec-dir> <trials-file> <scores-dir>"
fi
lda_data_dir=$1
enroll_data_dir=$2
test_data_dir=$3
lda_ivec_dir=$4
enroll_ivec_dir=$5
test_ivec_dir=$6
trials=$7
scores_dir=$8
if [ "$use_existing_models" == "true" ]; then
for f in ${lda_ivec_dir}/mean.vec ${lda_ivec_dir}/transform.mat ; do
[ ! -f $f ] && echo "No such file $f" && exit 1;
done
else
ivector-compute-lda --dim=$lda_dim --total-covariance-factor=$covar_factor \
"ark:ivector-normalize-length scp:${lda_ivec_dir}/ivector.scp ark:- |" \
ark:${lda_data_dir}/utt2spk \
${lda_ivec_dir}/transform.mat 2>${lda_ivec_dir}/log/lda.log
fi
mkdir -p $scores_dir
ivector-compute-dot-products "cat '$trials' | awk '{print \$1, \$2}' |" \
"ark:ivector-transform ${lda_ivec_dir}/transform.mat scp:${enroll_ivec_dir}/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \
"ark:ivector-transform ${lda_ivec_dir}/transform.mat scp:${test_ivec_dir}/ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \
$scores_dir/lda_scores
#!/usr/bin/perl
#
# Copyright 2015 David Snyder
# Apache 2.0.
# Usage: make_sre.pl <path-to-data> <name-of-source> <sre-ref> <output-dir>
if (@ARGV != 4) {
print STDERR "Usage: $0 <path-to-data> <name-of-source> <sre-ref> <output-dir>\n";
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2006S44 sre2004 sre_ref data/sre2004\n";
exit(1);
}
($db_base, $sre_name, $sre_ref_filename, $out_dir) = @ARGV;
%utt2sph = ();
%spk2gender = ();
$tmp_dir = "$out_dir/tmp";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $tmp_dir";
}
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
die "Error getting list of sph files";
}
open(WAVLIST, "<", "$tmp_dir/sph.list") or die "cannot open wav list";
while(<WAVLIST>) {
chomp;
$sph = $_;
@A1 = split("/",$sph);
@A2 = split("[./]",$A1[$#A1]);
$uttId=$A2[0];
$utt2sph{$uttId} = $sph;
}
open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
open(SRE_REF, "<", $sre_ref_filename) or die "Cannot open SRE reference.";
while (<SRE_REF>) {
chomp;
($speaker, $gender, $other_sre_name, $utt_id, $channel) = split(" ", $_);
$channel_num = "1";
if ($channel eq "A") {
$channel_num = "1";
} else {
$channel_num = "2";
}
if (($other_sre_name eq $sre_name) and (exists $utt2sph{$utt_id})) {
$full_utt_id = "$speaker-$gender-$sre_name-$utt_id-$channel";
$spk2gender{"$speaker-$gender"} = $gender;
print WAV "$full_utt_id"," sph2pipe -f wav -p -c $channel_num $utt2sph{$utt_id} |\n";
print SPKR "$full_utt_id $speaker-$gender","\n";
}
}
foreach $speaker (keys %spk2gender) {
print GNDR "$speaker $spk2gender{$speaker}\n";
}
close(GNDR) || die;
close(SPKR) || die;
close(WAV) || die;
close(SRE_REF) || die;
#!/bin/bash
# Copyright 2015 David Snyder
# Apache 2.0.
#
# See README.txt for more info on data required.
set -e
data_dir=$1
wget -P data/local/ http://www.openslr.org/resources/15/speaker_list.tgz
tar -C data/local/ -xvf data/local/speaker_list.tgz
sre_ref=data/local/speaker_list
local/make_sre.pl /export/corpora5/LDC/LDC2006S44/ \
sre2004 $sre_ref $data_dir/sre2004
local/make_sre.pl /export/corpora5/LDC/LDC2011S01 \
sre2005 $sre_ref $data_dir/sre2005_train
local/make_sre.pl /export/corpora5/LDC/LDC2011S04 \
sre2005 $sre_ref $data_dir/sre2005_test
local/make_sre.pl /export/corpora5/LDC/LDC2011S09 \
sre2006 $sre_ref $data_dir/sre2006_train
local/make_sre.pl /export/corpora5/LDC/LDC2011S10 \
sre2006 $sre_ref $data_dir/sre2006_test_1
local/make_sre.pl /export/corpora5/LDC/LDC2012S01 \
sre2006 $sre_ref $data_dir/sre2006_test_2
local/make_sre.pl /export/corpora5/LDC/LDC2011S05 \
sre2008 $sre_ref $data_dir/sre2008_train
local/make_sre.pl /export/corpora5/LDC/LDC2011S08 \
sre2008 $sre_ref $data_dir/sre2008_test
utils/combine_data.sh $data_dir/sre \
$data_dir/sre2004 $data_dir/sre2005_train \
$data_dir/sre2005_test $data_dir/sre2006_train \
$data_dir/sre2006_test_1 $data_dir/sre2006_test_2 \
$data_dir/sre2008_train $data_dir/sre2008_test
utils/validate_data_dir.sh --no-text --no-feats $data_dir/sre
utils/fix_data_dir.sh $data_dir/sre
rm data/local/speaker_list.*
#!/usr/bin/perl
#
# Copyright 2015 David Snyder
# Apache 2.0.
# Usage: make_sre_2010_test.pl /export/corpora5/SRE/SRE2010/eval/ data/.
if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-SRE2010-eval> <path-to-output>\n";
print STDERR "e.g. $0 /export/corpora5/SRE/SRE2010/eval/ data\n";
exit(1);
}
($db_base, $out_base_dir) = @ARGV;
$out_dir = "$out_base_dir/sre10_test";
$tmp_dir = "$out_dir/tmp";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $tmp_dir";
}
open(IN_TRIALS, "<", "$db_base/keys/coreext-coreext.trialkey.csv") or die "cannot open trials list";
open(OUT_TRIALS, ">", "$out_dir/trials") or die "cannot open trials list";
%trials = ();
while(<IN_TRIALS>) {
chomp;
($spkr,$utt,$side,$is_target,$is_scored,$c1,$c2,$c3,$c4,$c5,$c6,$c7,$c8,$c9) = split(",", $_);
$side = uc $side;
if (($is_scored eq 'Y') and ($c5 eq 'Y')) {
#if ($c5 eq 'Y') {
$key = "${spkr} ${utt}_${side}"; # Just keep track of the spkr-utterance pairs we want.
$trials{$key} = 1; # Just keep track of the spkr-utterance pairs we want.
print OUT_TRIALS "$spkr ${utt}_${side} $is_target\n";
}
}
close(OUT_TRIALS) || die;
close(IN_TRIALS) || die;
open(WAVLIST, "<", "$db_base/trials/coreext-coreext.ndx") or die "cannot open wav list";
open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
%spk2gender = ();
%utts = ();
while(<WAVLIST>) {
chomp;
$sph = $_;
($spkr, $gender, $wav_and_side) = split(" ", $sph);
($wav, $side) = split(":", $wav_and_side);
$wav = "${db_base}/data/${wav}";
@A = split("/", $wav);
$basename = $A[$#A];
$raw_basename = $basename;
$raw_basename =~ s/\.sph$// || die "bad basename $basename";
$uttId = $raw_basename . "_" . $side;
$key = "${spkr} ${uttId}";
if ( (not exists($trials{"${spkr} ${uttId}"}) ) or exists($utts{$uttId}) ) {
next;
}
$utts{$uttId} = 1;
if ($side eq "A") {
$channel = 1;
} elsif ($side eq "B") {
$channel = 2;
} else {
die "unknown channel $side\n";
}
print WAV "$uttId"," sph2pipe -f wav -p -c $channel $wav |\n";
print SPKR "$uttId $uttId\n";
print GNDR "$uttId $gender\n";
$spk2gender{$spkr} = $gender;
}
#foreach $spkr (keys(%spk2gender)) {
# print GNDR "$spkr $spk2gender{$spkr}\n";
#}
close(GNDR) || die;
close(SPKR) || die;
close(WAV) || die;
close(WAVLIST) || die;
if (system(
"utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
die "Error creating spk2utt file in directory $out_dir";
}
system("utils/fix_data_dir.sh $out_dir");
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
die "Error validating directory $out_dir";
}
#!/usr/bin/perl
#
# Copyright 2015 David Snyder
# Apache 2.0.
# Usage: make_sre_2010_train.pl /export/corpora5/SRE/SRE2010/eval/ data/.
if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-SRE2010-eval> <path-to-output>\n";
print STDERR "e.g. $0 /export/corpora5/SRE/SRE2010/eval/ data\n";
exit(1);
}
($db_base, $out_base_dir) = @ARGV;
$out_dir = "$out_base_dir/sre10_train";
$tmp_dir = "$out_dir/tmp";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $tmp_dir";
}
open(WAVLIST, "<", "$db_base/train/coreext.trn") or die "cannot open wav list";
open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
while(<WAVLIST>) {
chomp;
$sph = $_;
($spkr, $gender, $wav_and_side) = split(" ", $sph);
($wav, $side) = split(":", $wav_and_side);
@A = split("/", $wav);
$wav = "$db_base/data/$wav";
$basename = $A[$#A];
$raw_basename = $basename;
$raw_basename =~ s/\.sph$// || die "bad basename $basename";
$uttId = $spkr . $gender . "-" . $raw_basename . "_" . $side; # prefix spkr-id to utt-id to ensure sorted order.
if ($side eq "A") {
$channel = 1;
} elsif ($side eq "B") {
$channel = 2;
} else {
die "unknown channel $side\n";
}
print GNDR "$spkr $gender\n";
print WAV "$uttId"," sph2pipe -f wav -p -c $channel $wav |\n";
print SPKR "$uttId"," $spkr","\n";
}
close(GNDR) || die;
close(SPKR) || die;
close(WAV) || die;
close(WAVLIST) || die;
if (system(
"utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
die "Error creating spk2utt file in directory $out_dir";
}
system("utils/fix_data_dir.sh $out_dir");
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
die "Error validating directory $out_dir";
}
#!/usr/bin/perl
#
# Copyright 2013 Daniel Povey
# Apache 2.0
if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-LDC99S79> <path-to-output>\n";
print STDERR "e.g. $0 /export/corpora5/LDC/LDC99S79 data/swbd2_phase2_train\n";
exit(1);
}
($db_base, $out_dir) = @ARGV;
if (system("mkdir -p $out_dir")) {
die "Error making directory $out_dir";
}
open(CS, "<$db_base/DISC1/doc/callstat.tbl") || die "Could not open $db_base/DISC1/doc/callstat.tbl";
open(CI, "<$db_base/DISC1/doc/callinfo.tbl") || die "Could not open $db_base/DISC1/doc/callinfo.tbl";
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
@badAudio = ("3", "4");
$tmp_dir = "$out_base/tmp";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $tmp_dir";
}
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
die "Error getting list of sph files";
}
open(WAVLIST, "<", "$tmp_dir/sph.list") or die "cannot open wav list";
while(<WAVLIST>) {
chomp;
$sph = $_;
@t = split("/",$sph);
@t1 = split("[./]",$t[$#t]);
$uttId=$t1[0];
$wav{$uttId} = $sph;
}
while (<CS>) {
$line = $_ ;
$ci = <CI>;
$ci = <CI>;
@ci = split(",",$ci);
$wav = $ci[0];
@A = split(",", $line);
if (/$wav/i ~~ @badAudio) {
# do nothing
} else {
$spkr1= "sp2_" . $A[2];
$spkr2= "sp2_" . $A[3];
$gender1 = $A[4];
$gender2 = $A[5];
if ($gender1 eq "M") {
$gender1 = "m";
} elsif ($gender1 eq "F") {
$gender1 = "f";
} else {
die "Unknown Gender in $line";
}
if ($gender2 eq "M") {
$gender2 = "m";
} elsif ($gender2 eq "F") {
$gender2 = "f";
} else {
die "Unknown Gender in $line";
}
if (-e "$wav{$wav}") {
$uttId = $spkr1 ."_" . $wav ."_1";
if (!$spk2gender{$spkr1}) {
$spk2gender{$spkr1} = $gender1;
print GNDR "$spkr1"," $gender1\n";
}
print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav{$wav} |\n";
print SPKR "$uttId"," $spkr1","\n";
$uttId = $spkr2 . "_" . $wav ."_2";
if (!$spk2gender{$spkr2}) {
$spk2gender{$spkr2} = $gender2;
print GNDR "$spkr2"," $gender2\n";
}
print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wav{$wav} |\n";
print SPKR "$uttId"," $spkr2","\n";
} else {
print STDERR "Missing $wav{$wav} for $wav\n";
}
}
}
close(WAV) || die;
close(SPKR) || die;
close(GNDR) || die;
if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
die "Error creating spk2utt file in directory $out_dir";
}
if (system("utils/fix_data_dir.sh $out_dir") != 0) {
die "Error fixing data dir $out_dir";
}
if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
die "Error validating directory $out_dir";
}
#!/usr/bin/perl
#
# Copyright 2013 Daniel Povey
# Apache 2.0
if (@ARGV != 2) {
print STDERR "Usage: $0 <path-to-LDC2002S06> <path-to-output>\n";
print STDERR "e.g. $0 /export/corpora5/LDC/LDC2002S06 data/swbd2_phase3_train\n";
exit(1);
}
($db_base, $out_dir) = @ARGV;
if (system("mkdir -p $out_dir")) {
die "Error making directory $out_dir";
}
open(CS, "<$db_base/DISC1/docs/callstat.tbl") || die "Could not open $db_base/DISC1/docs/callstat.tbl";
open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender";
open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk";
open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp";
@badAudio = ("3", "4");
$tmp_dir = "$out_base/tmp";
if (system("mkdir -p $tmp_dir") != 0) {
die "Error making directory $tmp_dir";
}
if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) {
die "Error getting list of sph files";