Commit 44e344ce authored by Dan Povey's avatar Dan Povey
Browse files

trunk: cosmetic fixes; and extend capabilities of a couple of iVector-related command-line programs

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4320 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 851318fa
......@@ -121,6 +121,8 @@ if [ $# != 4 ]; then
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --first-component-power <power|1.0> # Power applied to output of first p-norm layer... setting this to"
echo " # 0.5 seems to help under some circumstances."
echo " --egs-opts <opts> # Extra options to pass to get_egs.sh"
echo " --lda-opts <opts> # Extra options to pass to get_lda.sh"
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
......
......@@ -31,9 +31,11 @@ int main(int argc, char *argv[]) {
"Copy matrices, or archives of matrices (e.g. features or transforms)\n"
"Also see copy-feats which has other format options\n"
"\n"
"Usage: copy-matrix [options] (matrix-in-rspecifier|matrix-in-rxfilename) (matrix-out-wspecifier|matrix-out-wxfilename)\n"
"Usage: copy-matrix [options] <matrix-in-rspecifier> <matrix-out-wspecifier>\n"
" or: copy-matrix [options] <matrix-in-rxfilename> <matrix-out-wxfilename>\n"
" e.g.: copy-matrix --binary=false 1.mat -\n"
" copy-matrix ark:2.trans ark,t:-\n";
" copy-matrix ark:2.trans ark,t:-\n"
"See also: copy-feats\n";
bool binary = true;
ParseOptions po(usage);
......
......@@ -33,7 +33,7 @@ int main(int argc, char *argv[]) {
"and expects a single transform matrix rather than possibly a table of matrices\n"
"\n"
"Usage: transform-vec [options] <transform-rxfilename> <feats-rspecifier> <feats-wspecifier>\n"
"See also: transform-vec, est-pca\n";
"See also: transform-feats, est-pca\n";
ParseOptions po(usage);
......
......@@ -29,7 +29,8 @@ int main(int argc, char *argv[]) {
const char *usage =
"Copy features [and possibly change format]\n"
"Usage: copy-feats [options] (<in-rspecifier> <out-wspecifier> | <in-rxfilename> <out-wxfilename>)\n"
"Usage: copy-feats [options] <feature-rspecifier> <feature-wspecifier>\n"
"or: copy-feats [options] <feats-rxfilename> <feats-wxfilename>\n"
"e.g.: copy-feats ark:- ark,scp:foo.ark,foo.scp\n"
" or: copy-feats ark:foo.ark ark,t:txt.ark\n"
"See also: copy-matrix, copy-feats-to-htk, copy-feats-to-sphinx, select-feats,\n"
......
......@@ -70,12 +70,12 @@ void Plda::ComputeDerivedVars() {
P(u^p) = N(u^p | 0, I + \Psi)
i.e. it's distributed with zero mean and covarance (within + between).
The likelihood ratio we want is:
N(u^p | \frac{n \Psi}{n \Psi + I} \bar{u}^g, I + \frac{\Psi}{n \Psi + I})
N(u^p | \frac{n \Psi}{n \Psi + I} \bar{u}^g, I + \frac{\Psi}{n \Psi + I}) /
N(u^p | 0, I + \Psi)
where \bar{u}^g is the mean of the "gallery examples"; and we can expand the
log likelihood ratio as
- 0.5 (u^p - m) (I + n \Psi/(n \Psi + I))^{-1} (u^p - m)
+ 0.5 u^p (I + \Psi) u^p
- 0.5 [ (u^p - m) (I + \Psi/(n \Psi + I))^{-1} (u^p - m) + logdet(I + \Psi/(n \Psi + I)) ]
+ 0.5 [u^p (I + \Psi) u^p + logdet(I + \Psi) ]
where m = (n \Psi)/(n \Psi + I) \bar{u}^g.
*/
......@@ -90,7 +90,7 @@ double Plda::GetNormalizationFactor(
Vector<double> transformed_ivector_sq(transformed_ivector);
transformed_ivector_sq.ApplyPow(2.0);
// inv_covar will equal 1.0 / (\Psi + I/num_examples).
Vector<double> inv_covar(psi_);
Vector<double> inv_covar(psi_);
inv_covar.Add(1.0 / num_examples);
inv_covar.InvertElements();
// "transformed_ivector" should have covariance (\Psi + I/num_examples), i.e.
......
......@@ -42,7 +42,8 @@ int main(int argc, char *argv[]) {
"Usage: ivector-compute-dot-products [options] <trials-in> "
"<ivector1-rspecifier> <ivector2-rspecifier> <scores-out>\n"
"e.g.: \n"
" ivector-compute-dot-products trials ark:train_ivectors.scp ark:test_ivectors.scp trials.scored\n";
" ivector-compute-dot-products trials ark:train_ivectors.scp ark:test_ivectors.scp trials.scored\n"
"See also: ivector-plda-scoring\n";
ParseOptions po(usage);
......
// ivectorbin/ivector-mean.cc
// Copyright 2013 Daniel Povey
// Copyright 2013-2014 Daniel Povey
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -27,98 +27,128 @@ int main(int argc, char *argv[]) {
typedef kaldi::int32 int32;
try {
const char *usage =
"Averages iVectors over all the utterances of each speaker\n"
"Input is a spk2utt file and a set of iVectors indexed by\n"
"With 3 or 4 arguments, averages iVectors over all the\n"
"utterances of each speaker using the spk2utt file.\n"
"Input the spk2utt file and a set of iVectors indexed by\n"
"utterance; output is iVectors indexed by speaker. If 4\n"
"arguments are given, the last one is a table for the number"
"of utterances per speaker (can be useful for PLDA)\n"
"arguments are given, extra argument is a table for the number\n"
"of utterances per speaker (can be useful for PLDA). If 2\n"
"arguments are given, computes the mean of all input files and\n"
"writes out the mean vector.\n"
"\n"
"Usage: ivector-mean <spk2utt-rspecifier> <ivector-rspecifier> "
"<ivector-wspecifier> [<num-utt-wspecifier>]\n"
"e.g.: ivector-mean data/spk2utt exp/ivectors.ark exp/spk_ivectors.ark exp/spk_num_utts.ark\n";
"or: ivector-mean <ivector-rspecifier> <mean-wxfilename>\n"
"e.g.: ivector-mean data/spk2utt exp/ivectors.ark exp/spk_ivectors.ark exp/spk_num_utts.ark\n"
"or: ivector-mean exp/ivectors.ark exp/mean.vec\n"
"See also: ivector-subtract-global-mean\n";
ParseOptions po(usage);
bool binary_write = false;
po.Register("binary", &binary_write, "If true, write output in binary "
"(only applicable when writing files, not archives/tables.");
po.Read(argc, argv);
if (po.NumArgs() < 3 || po.NumArgs() > 4) {
if (po.NumArgs() < 2 || po.NumArgs() > 4) {
po.PrintUsage();
exit(1);
}
std::string spk2utt_rspecifier = po.GetArg(1),
ivector_rspecifier = po.GetArg(2),
ivector_wspecifier = po.GetArg(3),
num_utts_wspecifier = po.GetOptArg(4);
if (po.NumArgs() == 2) {
// Compute the mean of the input vectors and write it out.
std::string ivector_rspecifier = po.GetArg(1),
mean_wxfilename = po.GetArg(2);
int32 num_done = 0;
SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
Vector<double> sum;
for (; !ivector_reader.Done(); ivector_reader.Next()) {
if (sum.Dim() == 0) sum.Resize(ivector_reader.Value().Dim());
sum.AddVec(1.0, ivector_reader.Value());
num_done++;
}
if (num_done == 0) {
KALDI_ERR << "No iVectors read";
} else {
sum.Scale(1.0 / num_done);
WriteKaldiObject(sum, mean_wxfilename, binary_write);
return 0;
}
} else {
std::string spk2utt_rspecifier = po.GetArg(1),
ivector_rspecifier = po.GetArg(2),
ivector_wspecifier = po.GetArg(3),
num_utts_wspecifier = po.GetOptArg(4);
double spk_sumsq = 0.0;
Vector<double> spk_sum;
double spk_sumsq = 0.0;
Vector<double> spk_sum;
int64 num_spk_done = 0, num_spk_err = 0,
num_utt_done = 0, num_utt_err = 0;
int64 num_spk_done = 0, num_spk_err = 0,
num_utt_done = 0, num_utt_err = 0;
RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
Int32Writer num_utts_writer(num_utts_wspecifier);
RandomAccessBaseFloatVectorReader ivector_reader(ivector_rspecifier);
SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
Int32Writer num_utts_writer(num_utts_wspecifier);
for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
std::string spk = spk2utt_reader.Key();
const std::vector<std::string> &uttlist = spk2utt_reader.Value();
if (uttlist.empty()) {
KALDI_ERR << "Speaker with no utterances.";
}
Vector<BaseFloat> spk_mean;
int32 utt_count = 0;
for (size_t i = 0; i < uttlist.size(); i++) {
std::string utt = uttlist[i];
if (!ivector_reader.HasKey(utt)) {
KALDI_WARN << "No iVector present in input for utterance " << utt;
num_utt_err++;
} else {
if (utt_count == 0) {
spk_mean = ivector_reader.Value(utt);
for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
std::string spk = spk2utt_reader.Key();
const std::vector<std::string> &uttlist = spk2utt_reader.Value();
if (uttlist.empty()) {
KALDI_ERR << "Speaker with no utterances.";
}
Vector<BaseFloat> spk_mean;
int32 utt_count = 0;
for (size_t i = 0; i < uttlist.size(); i++) {
std::string utt = uttlist[i];
if (!ivector_reader.HasKey(utt)) {
KALDI_WARN << "No iVector present in input for utterance " << utt;
num_utt_err++;
} else {
spk_mean.AddVec(1.0, ivector_reader.Value(utt));
if (utt_count == 0) {
spk_mean = ivector_reader.Value(utt);
} else {
spk_mean.AddVec(1.0, ivector_reader.Value(utt));
}
num_utt_done++;
utt_count++;
}
num_utt_done++;
utt_count++;
}
if (utt_count == 0) {
KALDI_WARN << "Not producing output for speaker " << spk
<< " since no utterances had iVectors";
num_spk_err++;
} else {
spk_mean.Scale(1.0 / utt_count);
ivector_writer.Write(spk, spk_mean);
if (num_utts_wspecifier != "")
num_utts_writer.Write(spk, utt_count);
num_spk_done++;
spk_sumsq += VecVec(spk_mean, spk_mean);
if (spk_sum.Dim() == 0)
spk_sum.Resize(spk_mean.Dim());
spk_sum.AddVec(1.0, spk_mean);
}
}
if (utt_count == 0) {
KALDI_WARN << "Not producing output for speaker " << spk
<< " since no utterances had iVectors";
num_spk_err++;
} else {
spk_mean.Scale(1.0 / utt_count);
ivector_writer.Write(spk, spk_mean);
if (num_utts_wspecifier != "")
num_utts_writer.Write(spk, utt_count);
num_spk_done++;
spk_sumsq += VecVec(spk_mean, spk_mean);
if (spk_sum.Dim() == 0)
spk_sum.Resize(spk_mean.Dim());
spk_sum.AddVec(1.0, spk_mean);
}
}
KALDI_LOG << "Computed mean of " << num_spk_done << " speakers ("
<< num_spk_err << " with no utterances), consisting of "
<< num_utt_done << " utterances (" << num_utt_err
<< " absent from input).";
KALDI_LOG << "Computed mean of " << num_spk_done << " speakers ("
<< num_spk_err << " with no utterances), consisting of "
<< num_utt_done << " utterances (" << num_utt_err
<< " absent from input).";
if (num_spk_done != 0) {
spk_sumsq /= num_spk_done;
spk_sum.Scale(1.0 / num_spk_done);
double mean_length = spk_sum.Norm(2.0),
spk_length = sqrt(spk_sumsq),
norm_spk_length = spk_length / sqrt(spk_sum.Dim());
KALDI_LOG << "Norm of mean of speakers is " << mean_length
<< ", root-mean-square speaker-iVector length divided by "
<< "sqrt(dim) is " << norm_spk_length;
}
if (num_spk_done != 0) {
spk_sumsq /= num_spk_done;
spk_sum.Scale(1.0 / num_spk_done);
double mean_length = spk_sum.Norm(2.0),
spk_length = sqrt(spk_sumsq),
norm_spk_length = spk_length / sqrt(spk_sum.Dim());
KALDI_LOG << "Norm of mean of speakers is " << mean_length
<< ", root-mean-square speaker-iVector length divided by "
<< "sqrt(dim) is " << norm_spk_length;
}
return (num_spk_done != 0 ? 0 : 1);
return (num_spk_done != 0 ? 0 : 1);
}
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
......
......@@ -32,7 +32,7 @@ int main(int argc, char *argv[]) {
const char *usage =
"Normalize length of iVectors to equal sqrt(feature-dimension)\n"
"\n"
"Usage: ivector-normalize-length [options] <ivector-rspecifier>"
"Usage: ivector-normalize-length [options] <ivector-rspecifier> "
"<ivector-wspecifier>\n"
"e.g.: \n"
" ivector-normalize-length ark:ivectors.ark ark:normalized_ivectors.ark\n";
......
......@@ -44,8 +44,9 @@ int main(int argc, char *argv[]) {
"Usage: ivector-plda-scoring <plda> <train-ivector-rspecifier> <test-ivector-rspecifier>\n"
" <trials-rxfilename> <scores-wxfilename>\n"
"\n"
"e.g.: ivector-plda-scoring --num-utts=exp/train/num_utts.ark plda "
"exp/train/spk_ivectors.ark exp/test/ivectors.ark - -\n";
"e.g.: ivector-plda-scoring --num-utts=ark:exp/train/num_utts.ark plda "
"ark:exp/train/spk_ivectors.ark ark:exp/test/ivectors.ark trials scores\n"
"See also: ivector-compute-dot-products, ivector-compute-plda\n";
ParseOptions po(usage);
......@@ -166,12 +167,12 @@ int main(int argc, char *argv[]) {
}
std::string key1 = fields[0], key2 = fields[1];
if (train_ivectors.count(key1) == 0) {
KALDI_WARN << "Key " << key1 << " not present in training iectors.";
KALDI_WARN << "Key " << key1 << " not present in training iVectors.";
num_trials_err++;
continue;
}
if (test_ivectors.count(key2) == 0) {
KALDI_WARN << "Key " << key2 << " not present in test ivectors.";
KALDI_WARN << "Key " << key2 << " not present in test iVectors.";
num_trials_err++;
continue;
}
......
......@@ -28,10 +28,14 @@ int main(int argc, char *argv[]) {
try {
const char *usage =
"Copies a table of iVectors but subtracts the global mean as\n"
"it does so.\n"
"it does so. The mean may be specified as the first argument; if not,\n"
"the sum of the input iVectors is used.\n"
"\n"
"Usage: ivector-subtract-global-mean <ivector-rspecifier> <ivector-wspecifier>\n"
"e.g.: ivector-subtract-global-mean scp:ivectors.scp ark:-\n";
"or: ivector-subtract-global-mean <mean-rxfliename> <ivector-rspecifier> <ivector-wspecifier>\n"
"e.g.: ivector-subtract-global-mean scp:ivectors.scp ark:-\n"
"or: ivector-subtract-global-mean mean.vec scp:ivectors.scp ark:-\n"
"See also: ivector-mean\n";
ParseOptions po(usage);
......@@ -41,51 +45,69 @@ int main(int argc, char *argv[]) {
po.Read(argc, argv);
if (po.NumArgs() != 2) {
if (po.NumArgs() < 2 || po.NumArgs() > 3) {
po.PrintUsage();
exit(1);
}
int64 num_done = 0;
std::string ivector_rspecifier = po.GetArg(1),
ivector_wspecifier = po.GetArg(2);
if (po.NumArgs() == 2) {
std::string ivector_rspecifier = po.GetArg(1),
ivector_wspecifier = po.GetArg(2);
Vector<double> sum;
Vector<double> sum;
int64 num_done = 0;
std::vector<std::pair<std::string, Vector<BaseFloat>*> > ivectors;
std::vector<std::pair<std::string, Vector<BaseFloat>*> > ivectors;
SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
for (; !ivector_reader.Done(); ivector_reader.Next()) {
std::string key = ivector_reader.Key();
const Vector<BaseFloat> &ivector = ivector_reader.Value();
if (sum.Dim() == 0) sum.Resize(ivector.Dim());
sum.AddVec(1.0, ivector);
num_done++;
ivectors.push_back(std::make_pair(key, new Vector<BaseFloat>(ivector)));
}
for (; !ivector_reader.Done(); ivector_reader.Next()) {
std::string key = ivector_reader.Key();
const Vector<BaseFloat> &ivector = ivector_reader.Value();
if (sum.Dim() == 0) sum.Resize(ivector.Dim());
sum.AddVec(1.0, ivector);
num_done++;
ivectors.push_back(std::make_pair(key, new Vector<BaseFloat>(ivector)));
}
KALDI_LOG << "Read " << num_done << " iVectors.";
KALDI_LOG << "Read " << num_done << " iVectors.";
if (num_done != 0) {
KALDI_LOG << "Norm of iVector mean was " << (sum.Norm(2.0) / num_done);
for (size_t i = 0; i < ivectors.size(); i++) {
std::string key = ivectors[i].first;
Vector<BaseFloat> *ivector = ivectors[i].second;
if (subtract_mean)
ivector->AddVec(-1.0 / num_done, sum);
ivector_writer.Write(key, *ivector);
delete ivector;
ivectors[i].second = NULL;
if (num_done != 0) {
KALDI_LOG << "Norm of iVector mean was " << (sum.Norm(2.0) / num_done);
for (size_t i = 0; i < ivectors.size(); i++) {
std::string key = ivectors[i].first;
Vector<BaseFloat> *ivector = ivectors[i].second;
if (subtract_mean)
ivector->AddVec(-1.0 / num_done, sum);
ivector_writer.Write(key, *ivector);
delete ivector;
ivectors[i].second = NULL;
}
}
} else {
// po.NumArgs() == 3
std::string mean_rxfilename = po.GetArg(1),
ivector_rspecifier = po.GetArg(2),
ivector_wspecifier = po.GetArg(3);
Vector<BaseFloat> mean;
ReadKaldiObject(mean_rxfilename, &mean);
SequentialBaseFloatVectorReader ivector_reader(ivector_rspecifier);
BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
for (; !ivector_reader.Done(); ivector_reader.Next()) {
std::string key = ivector_reader.Key();
Vector<BaseFloat> ivector = ivector_reader.Value();
ivector.AddVec(-1.0, mean);
ivector_writer.Write(key, ivector);
num_done++;
}
KALDI_LOG << "Wrote mean-subtracted iVectors";
}
KALDI_LOG << "Wrote " << num_done << " mean-subtracted iVectors";
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
......
......@@ -297,7 +297,8 @@ void CompressedMatrix::Write(std::ostream &os, bool binary) const {
this->CopyToMat(&temp_mat);
temp_mat.Write(os, binary);
#else
// Text-mode writing. Only really useful for debug, but we'll implement it.
// Text-mode writing out of the raw data. Only really useful for debug, but
// we'll implement it.
if (data_ == NULL) {
os << 0.0 << ' ' << 0.0 << ' ' << 0 << ' ' << 0 << '\n';
} else {
......
......@@ -253,7 +253,7 @@ void TestSgmm2PreXform(const AmSgmm2 &sgmm) {
kaldi::Vector<BaseFloat> res_vec(dim, kaldi::kSetZero);
res_vec.AddMatVec(1.0, a_inv, kaldi::kNoTrans, b_pre, 0.0);
res_vec.AddVec(1.0, b_inv);
KALDI_ASSERT(res_vec.IsZero(1.0e-6));
KALDI_ASSERT(res_vec.IsZero(1.0e-5));
}
void UnitTestSgmm2() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment