Commit 2fb7df1e authored by Dan Povey's avatar Dan Povey
Browse files

Various changes needed to scale up, inc. change to clustering code; fixes to...

Various changes needed to scale up, inc. change to clustering code; fixes to scripts R.E. s,cs; improvements to queue.pl

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@618 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent e5a652c4
......@@ -114,7 +114,6 @@ steps/align_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \
steps/train_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \
4000 150000 data/train_nodup data/lang exp/tri4a_ali_all_nodup exp/tri5a
scripts/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
scripts/decode.sh --num-jobs 10 --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri5a/graph \
data/eval2000 exp/tri5a/decode_eval2000
......
......@@ -209,14 +209,16 @@ scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri4b/gra
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b/decode_tgpr_eval92
# Train and test MMI, and boosted MMI, on tri4b.
steps/align_lda_mllt_sat.sh --num-jobs 30 --cmd "$train_cmd" \
# Making num-jobs 40 as want to keep them under 4 hours long (or will fail
# on regular queue at BUT).
steps/align_lda_mllt_sat.sh --num-jobs 40 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri4b exp/tri4b_ali_si284
steps/make_denlats_lda_etc.sh --num-jobs 30 --cmd "$train_cmd" \
steps/make_denlats_lda_etc.sh --num-jobs 40 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284
steps/train_lda_etc_mmi.sh --num-jobs 30 --cmd "$train_cmd" \
steps/train_lda_etc_mmi.sh --num-jobs 40 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284 exp/tri4b exp/tri4b_mmi
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi/decode_tgpr_dev93 exp/tri4b/decode_tgpr_dev93
steps/train_lda_etc_mmi.sh --boost 0.1 --num-jobs 30 --cmd "$train_cmd" \
steps/train_lda_etc_mmi.sh --boost 0.1 --num-jobs 40 --cmd "$train_cmd" \
data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284 exp/tri4b exp/tri4b_mmi_b0.1
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi_b0.1/decode_tgpr_dev93 exp/tri4b/decode_tgpr_dev93
......
#!/usr/bin/perl
use File::Basename;
use Cwd;
use Time::HiRes qw (usleep);
# queue.pl has the same functionality as run.pl, except that
# it runs the job in question on the queue.
......@@ -73,7 +74,7 @@ $shfile = "$dir/$base";
open(S, ">$shfile") || die "Could not write to script file $shfile";
`chmod +x $shfile`;
$qsub_cmd = "qrsh -j y -now no -o $logfile $qsub_opts $shfile";
$qsub_cmd = "qrsh -now no $qsub_opts $shfile";
#
# Write to the script file, and close it.
#
......@@ -90,18 +91,42 @@ print S "## submitted with:\n";
print S "# $qsub_cmd\n";
close(S) || die "Could not close script file $shfile";
$num_tries = 2; # Unless we fail with exit status 0 (i.e. the job returns with
# exit status 0 but there is no "finished" message at the
# end of the log-file), this is how many tries we do. But
# if we get this nasty exit-status-0 thing, which seems to be
# unpredictable and relates somehow to the queue system,
# we'll try more times and will put delays in.
$max_tries = 10;
$delay = 1; # seconds. We'll increase this.
$increment = 30; # increase delay by 30 secs each time.
for ($try = 1; ; $try++) {
#
# Try to run the script file, on the queue.
#
system "$qsub_cmd";
if ($? == 0) { exit(0); }
system "$qsub_cmd";
print STDERR "Command writing to $logfile failed; trying again\n";
system "mv $logfile $logfile.bak";
system "$qsub_cmd";
if ($? == 0) {
exit(0);
} else {
print STDERR "Command writing to $logfile failed second time. Command is in $shfile\n";
# Since we moved from qsub -sync y to qrsh (to work around a bug in
# GridEngine), we have had jobs fail yet return zero exit status.
# The "tail -1 $logfile" below is to try to catch this.
$ret = $?;
if ($ret == 0) { ## Check it's really successful: log-file should say "Finished" at end...
# but sleep first, for 0.1 seconds; need to wait for file system to sync.
usleep(100000);
if(`tail -1 $logfile` =~ m/Finished/) { exit(0); }
usleep(500000); # wait another half second and try again, in case file system is syncing slower than that.
if(`tail -1 $logfile` =~ m/Finished/) { exit(0); }
sleep(1); # now a full second.
if(`tail -1 $logfile` =~ m/Finished/) { exit(0); }
}
if ($try < $num_tries || ($ret == 0 && $try < $max_tries)) {
print STDERR "Command writing to $logfile failed with exit status $ret [on try $try]; waiting $delay seconds and trying again\n";
sleep($delay);
$delay += $increment;
system "mv $logfile $logfile.bak";
} else {
print STDERR "Command writing to $logfile failed after $try tries. Command is in $shfile\n";
exit(1);
}
}
......@@ -108,7 +108,7 @@ rm $dir/.error 2>/dev/null
echo "Aligning data from $data (with alignment model)"
for n in `get_splits.pl $nj`; do
sifeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
sifeatspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk ark:$dir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
featspart[$n]="${sifeatspart[$n]} transform-feats --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$dir/$n.trans ark:- ark:- |"
done
......@@ -129,7 +129,7 @@ for n in `get_splits.pl $nj`; do
weight-silence-post 0.0 $silphonelist $dir/final.alimdl ark:- ark:- \| \
gmm-post-to-gpost $dir/final.alimdl "${sifeatspart[$n]}" ark:- ark:- \| \
gmm-est-fmllr-gpost --spk2utt=ark:$data/split$nj/$n/spk2utt $dir/final.mdl "${sifeatspart[$n]}" \
ark:- ark:$dir/$n.trans || touch $dir/.error &
ark,s,cs:- ark:$dir/$n.trans || touch $dir/.error &
done
wait;
[ -f $dir/.error ] && echo Error computing fMLLR transforms && exit 1;
......
......@@ -75,7 +75,7 @@ fi
basefeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $alidir/*.cmvn|\" scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/lda.mat ark:- ark:- |"
for n in `get_splits.pl $nj`; do
splicedfeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- |"
splicedfeatspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- |"
basefeatspart[$n]="${splicedfeatspart[$n]} transform-feats $dir/lda.mat ark:- ark:- |"
featspart[$n]="${basefeatspart[$n]}" # This gets overwritten later in the script.
done
......
......@@ -79,11 +79,11 @@ fi
cp $alidir/final.mat $dir/
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $alidir/*.cmvn|\" scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk \"ark:cat $alidir/*.cmvn|\" scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
# featspart[n] gets overwritten later in the script.
for n in `get_splits.pl $nj`; do
sifeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
sifeatspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
done
n=`get_splits.pl $nj | awk '{print $1}'`
......
......@@ -75,9 +75,9 @@ if [ ! -d $data/split$nj -o $data/split$nj -ot $data/feats.scp ]; then
fi
for n in `get_splits.pl $nj`; do
sifeatspart[$n]="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
sifeatspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
done
sifeats="ark:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/*.cmvn|' scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
sifeats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/*.cmvn|' scp:$data/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
# Initial transforms... either find them, or create them.
n=`get_splits.pl $nj | awk '{print $1}'`
......
......@@ -235,23 +235,13 @@ void AmDiagGmm::Write(std::ostream &out_stream, bool binary) const {
}
}
void UbmClusteringOptions::Register(ParseOptions *po) {
std::string module = "UbmClusteringOptions: ";
po->Register("ubm-numcomps", &ubm_numcomps, module+
"Number of Gaussians components in the final UBM.");
po->Register("reduce-state-factor", &reduce_state_factor, module+
"Intermediate number of clustered states (as fraction of total states).");
po->Register("intermediate-numcomps", &intermediate_numcomps, module+
"Intermediate number of merged Gaussian components.");
po->Register("cluster-varfloor", &cluster_varfloor, module+
"Variance floor used in bottom-up state clustering.");
}
void UbmClusteringOptions::Check() {
if (ubm_numcomps > intermediate_numcomps)
KALDI_ERR << "Invalid parameters: --ubm-numcomps=" << ubm_numcomps
<< " > --intermediate-numcomps=" << intermediate_numcomps;
if (ubm_numcomps > max_am_gauss)
KALDI_ERR << "Invalid parameters: --ubm-numcomps=" << ubm_numcomps
<< " > --max-am-gauss=" << max_am_gauss;
if (ubm_numcomps <= 0)
KALDI_ERR << "Invalid parameters: --ubm-numcomps=" << ubm_numcomps;
if (cluster_varfloor <= 0)
......@@ -266,6 +256,24 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
const Vector<BaseFloat> &state_occs,
const UbmClusteringOptions &opts,
DiagGmm *ubm_out) {
if (am.NumGauss() > opts.max_am_gauss) {
KALDI_LOG << "ClusterGaussiansToUbm: first reducing num-gauss from " << am.NumGauss()
<< " to " << opts.max_am_gauss;
AmDiagGmm tmp_am;
tmp_am.CopyFromAmDiagGmm(am);
BaseFloat power = 1.0, min_count = 1.0; // Make the power 1, which I feel
// is appropriate to the way we're doing the overall clustering procedure.
tmp_am.MergeByCount(state_occs, opts.max_am_gauss, power, min_count);
UbmClusteringOptions opts_tmp(opts);
if (tmp_am.NumGauss() > opts.max_am_gauss) {
KALDI_LOG << "Clustered down to " << tmp_am.NumGauss()
<< "; will not cluster further";
opts_tmp.max_am_gauss = tmp_am.NumGauss();
}
ClusterGaussiansToUbm(tmp_am, state_occs, opts_tmp, ubm_out);
return;
}
int32 num_pdfs = static_cast<int32>(am.NumPdfs()),
dim = am.Dim(),
num_clust_states = static_cast<int32>(opts.reduce_state_factor*num_pdfs);
......@@ -332,51 +340,6 @@ void ClusterGaussiansToUbm(const AmDiagGmm& am,
for (int32 clust_index = 0; clust_index < num_clust_states; ++clust_index)
DeletePointers(&state_clust_gauss[clust_index]);
// // Put the remaining Gaussians together for a final bottom-up clustering.
// KALDI_VLOG(1) << "Putting " << opts.intermediate_numcomps << " Gaussians "
// << "together for a final bottom-up clustering.";
// vector<Clusterable*> gauss_cluster_in;
// gauss_cluster_in.reserve(opts.intermediate_numcomps);
// for (int32 clust_index = 0; clust_index < num_clust_states; ++clust_index) {
// for (int32 i = gauss_clusters_out[clust_index].size()-1; i >=0; --i) {
// GaussClusterable *this_gauss = static_cast<GaussClusterable*>(
// gauss_clusters_out[clust_index][i]);
// gauss_cluster_in.push_back(this_gauss);
// }
// }
// vector<Clusterable*> final_clusters;
// ClusterBottomUp(gauss_cluster_in, kBaseFloatMax, opts.ubm_numcomps,
// &final_clusters, /*get the clustered Gaussians*/
// &state_clusters /*cluster assignments not needed*/);
// DeletePointers(&gauss_cluster_in);
//
// KALDI_LOG << "Clustered " << am.NumGauss() << " Gaussians in the model to "
// << opts.ubm_numcomps << ". Copying components to UBM.";
// Matrix<BaseFloat> tmp_means(opts.ubm_numcomps, dim);
// Matrix<BaseFloat> tmp_vars(opts.ubm_numcomps, dim);
// Vector<BaseFloat> tmp_weights(opts.ubm_numcomps);
// Vector<BaseFloat> tmp_vec(dim);
// int32 gauss_index = 0;
// for (int32 i = final_clusters.size()-1; i >=0; --i) {
// GaussClusterable *this_gauss = static_cast<GaussClusterable*>(
// final_clusters[i]);
// BaseFloat weight = this_gauss->count();
// tmp_weights(gauss_index) = weight;
// tmp_vec.CopyFromVec(this_gauss->x_stats());
// tmp_vec.Scale(1/weight);
// tmp_means.CopyRowFromVec(tmp_vec, gauss_index);
// tmp_vec.CopyFromVec(this_gauss->x2_stats());
// tmp_vec.Scale(1/weight);
// tmp_vec.AddVec2(-1.0, tmp_means.Row(gauss_index)); // x^2 stats to var.
// tmp_vars.CopyRowFromVec(tmp_vec, gauss_index);
// gauss_index++;
// }
// tmp_gmm.Resize(opts.ubm_numcomps, dim);
// tmp_gmm.SetWeights(tmp_weights);
// tmp_gmm.SetInvVarsAndMeans(tmp_vars, tmp_means);
// tmp_gmm.ComputeGconsts();
// Next, put the remaining clustered Gaussians into a single GMM.
KALDI_VLOG(1) << "Putting " << opts.intermediate_numcomps << " Gaussians "
<< "into a single GMM for final merge step.";
......
......@@ -166,15 +166,30 @@ struct UbmClusteringOptions {
BaseFloat reduce_state_factor;
int32 intermediate_numcomps;
BaseFloat cluster_varfloor;
int32 max_am_gauss;
UbmClusteringOptions()
: ubm_numcomps(400), reduce_state_factor(0.2),
intermediate_numcomps(4000), cluster_varfloor(0.01) {}
intermediate_numcomps(4000), cluster_varfloor(0.01),
max_am_gauss(20000) {}
UbmClusteringOptions(int32 ncomp, BaseFloat red, int32 interm_comps,
BaseFloat vfloor)
: ubm_numcomps(ncomp), reduce_state_factor(red),
intermediate_numcomps(interm_comps), cluster_varfloor(vfloor) {}
void Register(ParseOptions *po);
void Register(ParseOptions *po) {
std::string module = "UbmClusteringOptions: ";
po->Register("max-am-gauss", &max_am_gauss, module+
"We first reduce acoustic model to this max #Gauss before clustering.");
po->Register("ubm-numcomps", &ubm_numcomps, module+
"Number of Gaussians components in the final UBM.");
po->Register("reduce-state-factor", &reduce_state_factor, module+
"Intermediate number of clustered states (as fraction of total states).");
po->Register("intermediate-numcomps", &intermediate_numcomps, module+
"Intermediate number of merged Gaussian components.");
po->Register("cluster-varfloor", &cluster_varfloor, module+
"Variance floor used in bottom-up state clustering.");
}
void Check();
};
......
......@@ -180,9 +180,9 @@ void DiagGmm::Merge(int32 target_components, std::vector<int32> *history) {
<< target_components << "), #Gauss = " << NumGauss();
}
if (NumGauss() == target_components) {
KALDI_WARN << "No components merged, as target (" << target_components
KALDI_VLOG(2) << "No components merged, as target (" << target_components
<< ") = total.";
return;
return; // Nothing to do.
}
int32 num_comp = NumGauss(), dim = Dim();
......
......@@ -25,6 +25,9 @@ using std::vector;
namespace kaldi {
typedef uint16 uint_smaller;
typedef int16 int_smaller;
// ============================================================================
// Some convenience functions used in the clustering routines
// ============================================================================
......@@ -232,7 +235,7 @@ class BottomUpClusterer {
std::vector<BaseFloat> dist_vec_;
int32 nclusters_;
int32 npoints_;
typedef std::pair<BaseFloat, std::pair<uint16, uint16> > QueueElement;
typedef std::pair<BaseFloat, std::pair<uint_smaller, uint_smaller> > QueueElement;
// Priority queue using greater (lowest distances are highest priority).
typedef std::priority_queue<QueueElement, std::vector<QueueElement>,
std::greater<QueueElement> > QueueType;
......@@ -244,7 +247,7 @@ BaseFloat BottomUpClusterer::Cluster() {
SetInitialDistances();
while (nclusters_ > min_clust_ && !queue_.empty()) {
std::pair<BaseFloat, std::pair<uint16, uint16> > pr = queue_.top();
std::pair<BaseFloat, std::pair<uint_smaller, uint_smaller> > pr = queue_.top();
BaseFloat dist = pr.first;
int32 i = (int32) pr.second.first, j = (int32) pr.second.second;
queue_.pop();
......@@ -264,7 +267,7 @@ void BottomUpClusterer::Renumber() {
// called after clustering, renumbers to make clusters contiguously
// numbered. also processes assignments_ to remove chains of references.
std::vector<uint16> mapping(npoints_, static_cast<uint16> (-1)); // mapping from intermediate to final clusters.
std::vector<uint_smaller> mapping(npoints_, static_cast<uint_smaller> (-1)); // mapping from intermediate to final clusters.
std::vector<Clusterable*> new_clusters(nclusters_);
int32 clust = 0;
std::vector<Clusterable*>::const_iterator iter = clusters_->begin(), end =
......@@ -284,7 +287,7 @@ void BottomUpClusterer::Renumber() {
while ((*assignments_)[ii] != ii)
ii = (*assignments_)[ii]; // follow the chain.
assert((*clusters_)[ii] != NULL); // cannot have assignment to nonexistent cluster.
assert(mapping[ii] != static_cast<uint16>(-1));
assert(mapping[ii] != static_cast<uint_smaller>(-1));
new_assignments[i] = mapping[ii];
}
clusters_->swap(new_clusters);
......@@ -306,8 +309,8 @@ void BottomUpClusterer::SetInitialDistances() {
BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j]));
dist_vec_[(i * (i - 1)) / 2 + j] = dist;
if (dist <= max_merge_thresh_)
queue_.push(std::make_pair(dist, std::make_pair(static_cast<uint16>(i),
static_cast<uint16>(j))));
queue_.push(std::make_pair(dist, std::make_pair(static_cast<uint_smaller>(i),
static_cast<uint_smaller>(j))));
}
}
}
......@@ -356,7 +359,7 @@ void BottomUpClusterer::ReconstructQueue() {
BaseFloat dist = dist_vec_[(i * (i - 1)) / 2 + j];
if (dist <= max_merge_thresh_) {
queue_.push(std::make_pair(dist, std::make_pair(
static_cast<uint16>(i), static_cast<uint16>(j))));
static_cast<uint_smaller>(i), static_cast<uint_smaller>(j))));
}
}
}
......@@ -370,8 +373,8 @@ void BottomUpClusterer::SetDistance(int32 i, int32 j) {
BaseFloat dist = (*clusters_)[i]->Distance(*((*clusters_)[j]));
dist_vec_[(i * (i - 1)) / 2 + j] = dist; // set the distance in the array.
if (dist < max_merge_thresh_) {
queue_.push(std::make_pair(dist, std::make_pair(static_cast<uint16>(i),
static_cast<uint16>(j))));
queue_.push(std::make_pair(dist, std::make_pair(static_cast<uint_smaller>(i),
static_cast<uint_smaller>(j))));
}
// every time it's at least twice the maximum possible size.
if (queue_.size() >= static_cast<size_t> (npoints_ * npoints_)) {
......@@ -390,8 +393,9 @@ BaseFloat ClusterBottomUp(const std::vector<Clusterable*> &points,
KALDI_ASSERT(max_merge_thresh >= 0.0 && min_clust >= 0);
KALDI_ASSERT(!ContainsNullPointers(points));
int32 npoints = points.size();
// make sure fits in uint16 and does not hit the -1 which is reserved.
KALDI_ASSERT(npoints < static_cast<int32>(static_cast<uint16>(-1)));
// make sure fits in uint_smaller and does not hit the -1 which is reserved.
KALDI_ASSERT(sizeof(uint_smaller)==sizeof(uint32) ||
npoints < static_cast<int32>(static_cast<uint_smaller>(-1)));
BottomUpClusterer bc(points, max_merge_thresh, min_clust, clusters_out, assignments_out);
BaseFloat ans = bc.Cluster();
......@@ -506,7 +510,7 @@ void CompartmentalizedBottomUpClusterer::Renumber(int32 comp) {
KALDI_ASSERT(clusts_in_compartment <= nclusters_);
// mapping from intermediate to final clusters.
vector<uint16> mapping(npoints_[comp], static_cast<uint16> (-1));
vector<uint_smaller> mapping(npoints_[comp], static_cast<uint_smaller> (-1));
vector<Clusterable*> new_clusters(clusts_in_compartment);
// Now copy the surviving clusters in a fresh array.
......@@ -527,7 +531,7 @@ void CompartmentalizedBottomUpClusterer::Renumber(int32 comp) {
ii = assignments_[comp][ii]; // follow the chain.
// cannot assign to nonexistent cluster.
KALDI_ASSERT(clusters_[comp][ii] != NULL);
KALDI_ASSERT(mapping[ii] != static_cast<uint16>(-1));
KALDI_ASSERT(mapping[ii] != static_cast<uint_smaller>(-1));
new_assignments[i] = mapping[ii];
}
clusters_[comp].swap(new_clusters);
......@@ -619,8 +623,8 @@ void CompartmentalizedBottomUpClusterer::SetDistance(int32 comp,
BaseFloat dist = clusters_[comp][i]->Distance(*(clusters_[comp][j]));
dist_vec_[comp][(i * (i - 1)) / 2 + j] = dist;
if (dist < max_merge_thresh_) {
queue_.push(CompBotClustElem(dist, comp, static_cast<uint16>(i),
static_cast<uint16>(j)));
queue_.push(CompBotClustElem(dist, comp, static_cast<uint_smaller>(i),
static_cast<uint_smaller>(j)));
}
}
......@@ -637,8 +641,9 @@ BaseFloat ClusterBottomUpCompartmentalized(
KALDI_ASSERT(!ContainsNullPointers(*itr));
npoints += itr->size();
}
// make sure fits in uint16 and does not hit the -1 which is reserved.
KALDI_ASSERT(npoints < static_cast<int32>(static_cast<uint16>(-1)));
// make sure fits in uint_smaller and does not hit the -1 which is reserved.
KALDI_ASSERT(sizeof(uint_smaller)==sizeof(uint32) ||
npoints < static_cast<int32>(static_cast<uint_smaller>(-1)));
CompartmentalizedBottomUpClusterer bc(points, thresh, min_clust);
BaseFloat ans = bc.Cluster(clusters_out, assignments_out);
......@@ -663,7 +668,7 @@ class RefineClusterer {
// to just make it int32). Also used as a time-id (cannot have more moves of
// points, than can fit in this time). Must be big enough to store num-clust.
typedef int32 LocalInt;
typedef uint16 ClustIndexInt;
typedef uint_smaller ClustIndexInt;
RefineClusterer(const std::vector<Clusterable*> &points,
std::vector<Clusterable*> *clusters,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment