Commit ea9909ce authored by Dan Povey's avatar Dan Povey
Browse files

trunk: Replace download server merlin, where used, with www.danielpovey.com...

trunk: Replace download server merlin, where used, with www.danielpovey.com (files on merlin disappeared).

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3246 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 3ddfe724
......@@ -47,7 +47,7 @@ export PATH=$PATH:/export/babel/sanjeev/kaldi-trunk/tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -29,7 +29,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -53,7 +53,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......@@ -87,7 +87,7 @@ cat $trans_file | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1
! merge_ngrams </dev/null >&/dev/null && \
echo merge_ngrams not found in kaldi_lm. You need to have kaldi_lm on your path OR && \
echo You can do the following: && \
echo 1. Install the latest version from http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz && \
echo 1. Install the latest version from http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz && \
echo 2. you delete kaldi_lm, and kaldi_lm.tar.gz in the tools folder. This script will automatically install it. && \
exit 1;
......
......@@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
......
......@@ -58,7 +58,9 @@ int main(int argc, char *argv[]) {
po.Register("thresh", &thresh, "Log-likelihood change threshold for "
"tree-building");
po.Register("cluster-thresh", &cluster_thresh, "Log-likelihood change "
"threshold for clustering after tree-building");
"threshold for clustering after tree-building. 0 means "
"no clustering; -1 means use as a clustering threshold the "
"likelihood change of the final split.");
po.Read(argc, argv);
......
......@@ -528,9 +528,9 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
Vector<BaseFloat> *loglikes) const {
loglikes->Resize(gconsts_.Dim(), kUndefined);
loglikes->CopyFromVec(gconsts_);
if (static_cast<int32>(data.Dim()) != Dim()) {
if (data.Dim() != Dim()) {
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
<< "mismatch " << (data.Dim()) << " vs. "<< (Dim());
<< "mismatch " << data.Dim() << " vs. "<< Dim();
}
Vector<BaseFloat> data_sq(data);
data_sq.ApplyPow(2.0);
......@@ -542,6 +542,26 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
}
void DiagGmm::LogLikelihoods(const MatrixBase<BaseFloat> &data,
Matrix<BaseFloat> *loglikes) const {
KALDI_ASSERT(data.NumRows() != 0);
loglikes->Resize(data.NumRows(), gconsts_.Dim(), kUndefined);
loglikes->CopyRowsFromVec(gconsts_);
if (data.NumCols() != Dim()) {
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
<< "mismatch " << data.NumCols() << " vs. "<< Dim();
}
Matrix<BaseFloat> data_sq(data);
data_sq.ApplyPow(2.0);
// loglikes += means * inv(vars) * data.
loglikes->AddMatMat(1.0, data, kNoTrans, means_invvars_, kTrans, 1.0);
// loglikes += -0.5 * inv(vars) * data_sq.
loglikes->AddMatMat(-0.5, data_sq, kNoTrans, inv_vars_, kTrans, 1.0);
}
void DiagGmm::LogLikelihoodsPreselect(const VectorBase<BaseFloat> &data,
const std::vector<int32> &indices,
Vector<BaseFloat> *loglikes) const {
......@@ -777,6 +797,79 @@ BaseFloat DiagGmm::GaussianSelection(const VectorBase<BaseFloat> &data,
return tot_loglike;
}
BaseFloat DiagGmm::GaussianSelection(const MatrixBase<BaseFloat> &data,
int32 num_gselect,
std::vector<std::vector<int32> > *output) const {
double ans = 0.0;
int32 num_frames = data.NumRows(), num_gauss = NumGauss();
int32 max_mem = 10000000; // Don't devote more than 10Mb to loglikes_mat;
// break up the utterance if needed.
int32 mem_needed = num_frames * num_gauss * sizeof(BaseFloat);
if (mem_needed > max_mem) {
// Break into parts and recurse, we don't want to consume too
// much memory.
int32 num_parts = (mem_needed + max_mem - 1) / max_mem;
int32 part_frames = (data.NumRows() + num_parts - 1) / num_parts;
double tot_ans = 0.0;
std::vector<std::vector<int32> > part_output;
output->clear();
output->resize(num_frames);
for (int32 p = 0; p < num_parts; p++) {
int32 start_frame = p * part_frames,
this_num_frames = std::min(num_frames - start_frame, part_frames);
SubMatrix<BaseFloat> data_part(data, start_frame, this_num_frames,
0, data.NumCols());
tot_ans += GaussianSelection(data_part, num_gselect, &part_output);
for (int32 t = 0; t < this_num_frames; t++)
(*output)[start_frame + t].swap(part_output[t]);
}
KALDI_ASSERT(!output->back().empty());
return tot_ans;
}
KALDI_ASSERT(num_frames != 0);
Matrix<BaseFloat> loglikes_mat(num_frames, num_gauss, kUndefined);
this->LogLikelihoods(data, &loglikes_mat);
output->clear();
output->resize(num_frames);
for (int32 i = 0; i < num_frames; i++) {
SubVector<BaseFloat> loglikes(loglikes_mat, i);
BaseFloat thresh;
if (num_gselect < num_gauss) {
Vector<BaseFloat> loglikes_copy(loglikes);
BaseFloat *ptr = loglikes_copy.Data();
std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
thresh = ptr[num_gauss-num_gselect];
} else {
thresh = -std::numeric_limits<BaseFloat>::infinity();
}
BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
std::vector<std::pair<BaseFloat, int32> > pairs;
for (int32 p = 0; p < num_gauss; p++) {
if (loglikes(p) >= thresh) {
pairs.push_back(std::make_pair(loglikes(p), p));
}
}
std::sort(pairs.begin(), pairs.end(),
std::greater<std::pair<BaseFloat, int32> >());
std::vector<int32> &this_output = (*output)[i];
for (int32 j = 0;
j < num_gselect && j < static_cast<int32>(pairs.size());
j++) {
this_output.push_back(pairs[j].second);
tot_loglike = LogAdd(tot_loglike, pairs[j].first);
}
KALDI_ASSERT(!this_output.empty());
ans += tot_loglike;
}
return ans;
}
BaseFloat DiagGmm::GaussianSelectionPreselect(
const VectorBase<BaseFloat> &data,
......
......@@ -81,6 +81,13 @@ class DiagGmm {
void LogLikelihoods(const VectorBase<BaseFloat> &data,
Vector<BaseFloat> *loglikes) const;
/// This version of the LogLikelihoods function operates on
/// a sequence of frames simultaneously; the row index of both "data" and
/// "loglikes" is the frame index.
void LogLikelihoods(const MatrixBase<BaseFloat> &data,
Matrix<BaseFloat> *loglikes) const;
/// Outputs the per-component log-likelihoods of a subset of mixture
/// components. Note: at output, loglikes->Dim() will equal indices.size().
/// loglikes[i] will correspond to the log-likelihood of the Gaussian
......@@ -89,13 +96,20 @@ class DiagGmm {
const std::vector<int32> &indices,
Vector<BaseFloat> *loglikes) const;
/// Get gaussian selection information for one frame. Returns log-like for
/// Get gaussian selection information for one frame. Returns og-like
/// this frame. Output is the best "num_gselect" indices, sorted from best to
/// worst likelihood. If "num_gselect" > NumGauss(), sets it to NumGauss().
BaseFloat GaussianSelection(const VectorBase<BaseFloat> &data,
int32 num_gselect,
std::vector<int32> *output) const;
/// This version of the Gaussian selection function works for a sequence
/// of frames rather than just a single frame. Returns sum of the log-likes
/// over all frames.
BaseFloat GaussianSelection(const MatrixBase<BaseFloat> &data,
int32 num_gselect,
std::vector<std::vector<int32> > *output) const;
/// Get gaussian selection information for one frame. Returns log-like for
/// this frame. Output is the best "num_gselect" indices that were
/// preselected, sorted from best to worst likelihood. If "num_gselect" >
......
......@@ -105,9 +105,8 @@ int main(int argc, char *argv[]) {
gmm.GaussianSelectionPreselect(mat.Row(i), preselect[i],
num_gselect, &(gselect[i]));
} else { // No "preselect" [i.e. no existing gselect]: simple case.
for (int32 i = 0; i < mat.NumRows(); i++)
tot_like_this_file +=
gmm.GaussianSelection(mat.Row(i), num_gselect, &(gselect[i]));
tot_like_this_file =
gmm.GaussianSelection(mat, num_gselect, &gselect);
}
gselect_writer.Write(utt, gselect);
......
......@@ -128,8 +128,8 @@ sph2pipe_v2.5: sph2pipe_v2.5.tar.gz
tar xzf sph2pipe_v2.5.tar.gz
sph2pipe_v2.5.tar.gz:
wget -T 10 -t 3 http://merlin.fit.vutbr.cz/kaldi/sph2pipe_v2.5.tar.gz || \
wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz
wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz || \
wget -T 10 -t 3 http://www.danielpovey.com/files/kaldi/sph2pipe_v2.5.tar.gz
openblas: openblas_compiled
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment