Commit 352ccd58 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: a lot of minor fixes, cosmetic changes and revisions (most...

trunk: a lot of minor fixes, cosmetic changes and revisions (most significantly: making conjugate gradient more robust.)

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4681 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent cc769ca3
......@@ -52,4 +52,3 @@ if [ $stage -le 3 ]; then
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 4 \
data/train_max2 exp/nnet2_online/extractor exp/nnet2_online/ivectors || exit 1;
fi
......@@ -34,21 +34,20 @@ EOF
fi
parallel_opts="-l gpu=1"
num_threads=1
minibatch_size=512
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
fi
if [ $stage -le 1 ]; then
# use a wide beam because this is RM. These would be too high for other setups.
# the conf/decode.config gives it higher than normal beam/lattice-beam of (20,10), since
# otherwise on RM we'd get very thin lattices.
nj=30
num_threads=6
steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads" \
--nj $nj --sub-split 40 --num-threads "$num_threads" --beam 20.0 --lattice-beam 10.0 \
num_threads_denlats=6
steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads_denlats" \
--nj $nj --sub-split 40 --num-threads "$num_threads_denlats" --config conf/decode.config \
data/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
......@@ -66,8 +65,9 @@ fi
if [ $stage -le 3 ]; then
# I tested the following with --max-temp-archives 3
# to test other branches of the code.
# the -tc 5 limits the I/O.
steps/online/nnet2/get_egs_discriminative2.sh \
--cmd "$decode_cmd -pe smp 5" \
--cmd "$decode_cmd -tc 5" \
--criterion $criterion --drop-frames $drop_frames \
data/train data/lang ${srcdir}{_ali,_denlats,,_degs} || exit 1;
fi
......
......@@ -75,7 +75,7 @@ def create_config_files(output_dir, params):
nnet_config = ["SpliceComponent input-dim={0} context={1} const-component-dim={2}".format(params.total_input_dim, contexts[0], params.ivector_dim),
"FixedAffineComponent matrix={0}".format(params.lda_mat),
"AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev={4} bias-stddev={5}".format(params.lda_dim, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev),
"PnormComponent input-dim={0} output-dim={1} p={2}".format(pnorm_input_dim, pnorm_output_dim, pnorm_p),
("PnormComponent input-dim={0} output-dim={1} p={2}".format(pnorm_input_dim, pnorm_output_dim, pnorm_p) if pnorm_input_dim != pnorm_output_dim else "RectifiedLinearComponent dim={0}".format(pnorm_input_dim)),
"NormalizeComponent dim={0}".format(pnorm_output_dim),
"AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev=0 bias-stddev=0".format(pnorm_output_dim, params.num_targets, params.online_preconditioning_opts, params.initial_learning_rate),
"SoftmaxComponent dim={0}".format(params.num_targets)]
......
......@@ -14,8 +14,9 @@
if [ $# != 1 ]; then
echo "Usage: $0 <egs-dir>"
echo "e.g.: $0 data/nnet4b/egs/"
echo "This script is usually equivalent to 'rm <egs-dir>/egs.*' but it handles following"
echo "soft links to <egs-dir>/storage/, and avoids deleting anything in the directory if"
echo "e.g.: $0 data/nnet4b_mpe/degs/"
echo "This script is usually equivalent to 'rm <egs-dir>/egs.* <egs-dir>/degs.*' but it follows"
echo "soft links to <egs-dir>/storage/; and it avoids deleting anything in the directory if"
echo "someone did 'touch <egs-dir>/.nodelete"
exit 1;
fi
......@@ -28,22 +29,19 @@ if [ ! -d $egs ]; then
fi
if [ -f $egs/.nodelete ]; then
echo "$0: not delting egs in $egs since $egs/.nodelete exists"
echo "$0: not deleting egs in $egs since $egs/.nodelete exists"
exit 0;
fi
flist=$egs/egs.*.ark
if [ "$flist" == '$egs/egs.*.ark' ]; then # did not expand..
echo "$0: directory $egs does not seem to contain egs"
exit 1;
fi
for f in $flist; do
for f in $egs/egs.*.ark $egs/degs.*.ark; do
if [ -L $f ]; then
rm $(dirname $f)/$(readlink $f) # this will print a warning if it fails.
fi
rm $f
rm $f 2>/dev/null
done
......
......@@ -166,11 +166,13 @@ while [ $x -lt $num_iters ]; do
nnets_list=$(for n in $(seq $num_jobs_nnet); do echo $dir/$[$x+1].$n.mdl; done)
$cmd $dir/log/average.$x.log \
# below use run.pl instead of a generic $cmd for these very quick stages,
# so that we don't run the risk of waiting for a possibly hard-to-get GPU.
run.pl $dir/log/average.$x.log \
nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;
if $modify_learning_rates; then
$cmd $dir/log/modify_learning_rates.$x.log \
run.pl $dir/log/modify_learning_rates.$x.log \
nnet-modify-learning-rates --retroactive=$retroactive \
--last-layer-factor=$last_layer_factor \
--first-layer-factor=$first_layer_factor \
......
......@@ -22,8 +22,8 @@ if [ $# != 2 ]; then
echo "e.g.:"
echo " $0 --utts-per-spk-max 2 data/train data/train-max2"
echo "Options"
echo " --utts-per-spk-max=n # number of utterances per speaker maximum,"
echo " # default -1 (meaning no maximum). E.g. 2."
echo " --utts-per-spk-max <n> # number of utterances per speaker maximum,"
echo " # default -1 (meaning no maximum). E.g. 2."
exit 1;
fi
......
......@@ -12,6 +12,9 @@
# Begin configuration section.
stage=-5
exit_stage=-100 # you can use this to require it to exit at the
# beginning of a specific stage. Not all values are
# supported.
fmllr_update_type=full
cmd=run.pl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
......@@ -162,6 +165,8 @@ if [ $stage -le -1 ]; then
"ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
[ "$exit_stage" -eq 0 ] && echo "$0: Exiting early: --exit-stage $exit_stage" && exit 1;
if [ $stage -le 0 ] && [ "$realign_iters" != "" ]; then
echo "$0: Compiling graphs of transcripts"
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
......
......@@ -79,7 +79,7 @@ while true; do
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": --$name $2" 1>&2
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
......
......@@ -593,14 +593,23 @@ if (-e "$lang/L_disambig.fst") {
# Check if G.fst is ilabel sorted.
if (-e "$lang/G.fst") {
$cmd = "fstinfo $lang/G.fst | grep -E 'input label sorted.*y' > /dev/null";
$res = system(". ./path.sh; $cmd");
if ($res == 0) {
$text = ` ./path.sh; fstinfo $lang/G.fst`;
if ($text =~ m/input label sorted\s+y/) {
print "--> $lang/G.fst is ilabel sorted\n";
} else {
print "--> ERROR: $lang/G.fst is not ilabel sorted\n";
$exit = 1;
}
if ($text =~ m/# of states\s+(\d+)/) {
$num_states = $1;
if ($num_states == 0) {
print "--> ERROR: $lang/G.fst is empty\n";
$exit = 1;
} else {
print "--> $lang/G.fst has $num_states states\n";
}
}
}
# Check determinizability of G.fst
......
......@@ -29,9 +29,10 @@ int main(int argc, char *argv[]) {
typedef kaldi::int32 int32;
const char *usage =
"Do MLLT update\n"
"Do update for MLLT (also known as STC)\n"
"Usage: est-mllt [options] <mllt-mat-out> <stats-in1> <stats-in2> ... \n"
"e.g.: est-mllt 2.mat 1a.macc 1b.macc ... \n"
"where the stats are obtained from gmm-acc-mllt\n"
"Note: use compose-transforms <mllt-mat-out> <prev-mllt-mat> to combine with previous\n"
" MLLT or LDA transform, if any, and\n"
" gmm-transform-means to apply <mllt-mat-out> to GMM means.\n";
......
......@@ -567,8 +567,12 @@ template<class Arc> class DeterminizerStar {
// processes final-weights for this subset.
bool is_final = false;
StringId final_string = 0; // = 0 to keep compiler happy.
Weight final_weight;
typename vector<Element>::const_iterator iter = closed_subset.begin(), end = closed_subset.end();
Weight final_weight = Weight::One(); // This value will never be accessed, and
// we just set it to avoid spurious compiler warnings. We avoid setting it
// to Zero() because floating-point infinities can sometimes generate
// interrupts and slow things down.
typename vector<Element>::const_iterator iter = closed_subset.begin(),
end = closed_subset.end();
for (; iter != end; ++iter) {
const Element &elem = *iter;
Weight this_final_weight = ifst_->Final(elem.state);
......
......@@ -28,7 +28,8 @@ int main(int argc, char *argv[]) {
const char *usage =
"Sum multiple accumulated stats files for GMM training.\n"
"Usage: gmm-sum-accs [options] stats-out stats-in1 stats-in2 ...\n";
"Usage: gmm-sum-accs [options] <stats-out> <stats-in1> <stats-in2> ...\n"
"E.g.: gmm-sum-accs 1.acc 1.1.acc 1.2.acc\n";
bool binary = true;
kaldi::ParseOptions po(usage);
......
......@@ -117,10 +117,24 @@ void TestIvectorExtraction(const IvectorExtractor &extractor,
extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL);
online_stats.GetIvector(-1, &ivector2);
int32 num_cg_iters = -1; // for testing purposes, compute it exactly.
online_stats.GetIvector(num_cg_iters, &ivector2);
KALDI_LOG << "ivector1 = " << ivector1;
KALDI_LOG << "ivector2 = " << ivector2;
// objf change vs. default iVector. note, here I'm using objf
// and auxf pretty much interchangeably :-(
double objf_change2 = online_stats.ObjfChange(ivector2) *
utt_stats.NumFrames();
Vector<double> ivector_baseline(ivector_dim);
ivector_baseline(0) = extractor.PriorOffset();
double objf_change1 = extractor.GetAuxf(utt_stats, ivector1) -
extractor.GetAuxf(utt_stats, ivector_baseline);
KALDI_LOG << "objf_change1 = " << objf_change1
<< ", objf_change2 = " << objf_change2;
KALDI_ASSERT(ivector1.ApproxEqual(ivector2));
}
......@@ -198,7 +212,7 @@ void UnitTestIvectorExtractor() {
int main() {
using namespace kaldi;
SetVerboseLevel(4);
SetVerboseLevel(5);
for (int i = 0; i < 10; i++)
UnitTestIvectorExtractor();
std::cout << "Test OK.\n";
......
......@@ -80,6 +80,8 @@ class IvectorExtractorUtteranceStats {
void Scale(double scale); // Used to apply acoustic scale.
double NumFrames() { return gamma_.Sum(); }
protected:
friend class IvectorExtractor;
friend class IvectorExtractorStats;
......@@ -331,7 +333,7 @@ class OnlineIvectorEstimationStats {
double PriorOffset() const { return prior_offset_; }
/// ObjfChange returns the change in objective function per frame from
/// ObjfChange returns the change in objective function *per frame* from
/// using the default value [ prior_offset_, 0, 0, ... ] to
/// using the provided value; should be >= 0, if "ivector" is
/// a value we estimated. This is for diagnostics.
......
......@@ -418,6 +418,9 @@ OptimizeLbfgs<Real>::GetValue(Real *objf_value) const {
return best_x_;
}
// to compute the alpha, we are minimizing f(x) = x^T b - 0.5 x_k^T A x_k along
// direction p_k... consider alpha
// d/dx of f(x) = b - A x_k = r.
// Notation based on Sec. 5.1 of Nocedal and Wright
// Computation based on Alg. 5.2 of Nocedal and Wright (Pg. 112)
......@@ -425,10 +428,10 @@ OptimizeLbfgs<Real>::GetValue(Real *objf_value) const {
// To solve Ax=b for x
// k : current iteration
// x_k : estimate of x (at iteration k)
// r_k : residual
// r_k : residual ( r_k \eqdef A x_k - b )
// \alpha_k : step size
// p_k : A-conjugate direction
// \beta_k : coefficient used in A-conjuagate direction computation for next
// \beta_k : coefficient used in A-conjugate direction computation for next
// iteration
//
// Algo. LinearCG(A,b,x_0)
......@@ -466,40 +469,63 @@ int32 LinearCgd(const LinearCgdOptions &opts,
r_initial_norm_sq = r_cur_norm_sq,
r_recompute_norm_sq = r_cur_norm_sq;
KALDI_VLOG(5) << "In linear CG: initial norm-square of residual = "
<< r_initial_norm_sq;
KALDI_ASSERT(opts.recompute_residual_factor <= 1.0);
Real max_error_sq = std::max<Real>(opts.max_error * opts.max_error,
std::numeric_limits<Real>::min()),
residual_factor = opts.recompute_residual_factor *
opts.recompute_residual_factor;
opts.recompute_residual_factor,
inv_residual_factor = 1.0 / residual_factor;
// Note: although from a mathematical point of view the method should converge
// after M iterations, in practice (due to roundoff) it does not always
// converge to good precision after that many iterations so we let the maximum
// be 1.5 * M + 5 instead.
// be M + 5 instead.
int32 k = 0;
for (; k < M + M / 2 + 5 && k != opts.max_iters; k++) {
for (; k < M + 5 && k != opts.max_iters; k++) {
// Note: we'll break from this loop if we converge sooner due to
// max_error.
Ap.AddSpVec(1.0, A, p, 0.0); // Ap = A p
// next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
Real alpha = r_cur_norm_sq / VecVec(p, Ap);
// Below is how the code used to look.
// // next line: \alpha_k = (r_k^T r_k) / (p_k^T A p_k)
// Real alpha = r_cur_norm_sq / VecVec(p, Ap);
//
// We changed r_cur_norm_sq below to -VecVec(p, r). Although this is
// slightly less efficient, it seems to make the algorithm dramatically more
// robust. Note that -p^T r is the mathematically more natural quantity to
// use here, that corresponds to minimizing along that direction... r^T r is
// recommended in Nocedal and Wright only as a kind of optimization as it is
// supposed to be the same as -p^T r and we already have it computed.
Real alpha = -VecVec(p, r) / VecVec(p, Ap);
// next line: x_{k+1} = x_k + \alpha_k p_k;
x->AddVec(alpha, p);
// next line: r_{k+1} = r_k + \alpha_k A p_k
r.AddVec(alpha, Ap);
Real r_next_norm_sq = VecVec(r, r);
if (r_next_norm_sq < residual_factor * r_recompute_norm_sq) {
if (r_next_norm_sq < residual_factor * r_recompute_norm_sq ||
r_next_norm_sq > inv_residual_factor * r_recompute_norm_sq) {
// Recompute the residual from scratch if the residual norm has decreased
// a lot; this costs an extra matrix-vector multiply, but helps keep the
// residual accurate.
// Also do the same if the residual norm has increased a lot since
// the last time we recomputed... this shouldn't happen often, but
// it can indicate bad stuff is happening.
// r_{k+1} = A x_{k+1} - b
r.AddSpVec(1.0, A, *x, 0.0);
r.AddVec(-1.0, b);
r_next_norm_sq = VecVec(r, r);
r_recompute_norm_sq = r_next_norm_sq;
KALDI_VLOG(5) << "In linear CG: recomputing residual.";
}
KALDI_VLOG(5) << "In linear CG: r_next_norm_sq = " << r_next_norm_sq;
KALDI_VLOG(5) << "In linear CG: k = " << k
<< ", r_next_norm_sq = " << r_next_norm_sq;
// Check if converged.
if (r_next_norm_sq <= max_error_sq)
break;
......
......@@ -35,21 +35,25 @@ int main(int argc, char *argv[]) {
const char *usage =
"This program first identifies any affine or block affine layers that\n"
"are followed by pnorm and then renormalize layers. Then it rescales\n"
"those layers such that the parameter stddev is 1.0 after scaling.\n"
"those layers such that the parameter stddev is 1.0 after scaling\n"
"(the target stddev is configurable by the --stddev option).\n"
"If you supply the option --stddev-from=<model-filename>, it rescales\n"
"those layers to match the standard deviation of those in the specified\n"
"model.\n"
"those layers to match the standard deviations of corresponding layers\n"
"in the specified model.\n"
"\n"
"Usage: nnet-normalize-stddev [options] <model-in> <model-out>\n"
" e.g.: nnet-normalize-stddev final.mdl final.mdl\n";
bool binary_write = true;
BaseFloat stddev = 1.0;
std::string reference_model_filename;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("stddev-from", &reference_model_filename, "Reference model");
po.Register("stddev", &stddev, "Target standard deviation that we normalize "
"to (note: is overridden by --stddev-from option, if supplied)");
po.Read(argc, argv);
if (po.NumArgs() != 2) {
......@@ -91,7 +95,7 @@ int main(int argc, char *argv[]) {
PnormComponent *pc = dynamic_cast<PnormComponent*>(component);
if (pc == NULL)
continue;
// Checks if the layer after the pnorm layer is a NormalizeComponent
// or a PowerComponent followed by a NormalizeComponent
component = &(am_nnet.GetNnet().GetComponent(c + 2));
......@@ -121,11 +125,11 @@ int main(int argc, char *argv[]) {
KALDI_ASSERT(am_nnet_ref.GetNnet().NumComponents() == am_nnet.GetNnet().NumComponents());
}
BaseFloat ref_stddev =0.0;
BaseFloat ref_stddev = 0.0;
// Normalizes the identified layers.
for (int32 c = 0; c < identified_components.size(); c++) {
ref_stddev = 0.0;
ref_stddev = stddev;
if (!reference_model_filename.empty()) {
kaldi::nnet2::Component *component =
&(am_nnet_ref.GetNnet().GetComponent(identified_components[c]));
......@@ -152,10 +156,7 @@ int main(int argc, char *argv[]) {
BaseFloat params_stddev = sqrt(VecVec(params, params)
/ static_cast<BaseFloat>(params.Dim()));
if (params_stddev > 0.0) {
if(ref_stddev > 0.0)
uc->Scale(ref_stddev / params_stddev);
else
uc->Scale(1.0 / params_stddev);
uc->Scale(ref_stddev / params_stddev);
KALDI_LOG << "Normalized component " << identified_components[c];
}
}
......
......@@ -97,7 +97,7 @@ void TestMultiSgmmEst(const std::vector<AmSgmm*> &models,
}
void UnitTestEstimateSgmm() {
int32 dim = 1 + kaldi::RandInt(0, 9); // random dimension of the gmm
int32 dim = 2 + kaldi::RandInt(0, 9); // random dimension of the gmm
int32 num_comp = 2 + kaldi::RandInt(0, 9); // random mixture size
kaldi::FullGmm full_gmm;
ut::InitRandFullGmm(dim, num_comp, &full_gmm);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment