Commit 55b17595 authored by Dan Povey's avatar Dan Povey
Browse files

sandbox/dan2: Merging changes from trunk; change nnet2 code to use compressed examples.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/dan2@3100 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent c1f74e52
......@@ -201,7 +201,7 @@ for f in `cat $ffv_flist | cut -d, -f2`; do
fi
done
cat $ffv_flist | cut -d, -f2 | \
perl -ane 'm:/([^/]+)\.ffv: || die "Bad line $_"; $key=$1; s/\.ffv$/\.mat/; print "$key $_";' > $scpfile
perl -ane 'm:/([^/]+)\.ffv$: || die "Bad line $_"; $key=$1; s/\.ffv$/\.mat/; print "$key $_";' > $scpfile
EOF
chmod +x $expdir/convert.sh
......
......@@ -171,7 +171,7 @@ for f in `cat $sacc_flist | cut -d, -f2`; do
fi
done
cat $sacc_flist | cut -d, -f2 | \
perl -ane 'm:/([^/]+)\.pitch: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile
perl -ane 'm:/([^/]+)\.pitch$: || die "Bad line $_"; $key=$1; s/\.pitch$/\.mat/; print "$key $_";' > $scpfile
EOF
chmod +x $expdir/convert.sh
......
### for n in `cat decode_dir `; do sh local/cal_runtime.sh $n; done
## wide beam decoding run time factor (source code version before 12th Oct 2013)
## Intel(R) Xeon(R) CPU X5690 @ 3.47GHz, > 64G RAM
# 1.8M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_closelm_xeon3.5/log 0.50984
exp/nnet_8m_6l/decode_wide_eval_closelm_xeon3.5/log 0.604995
exp/nnet_tanh_3l/decode_wide_eval_closelm_xeon3.5/log 0.584603
exp/nnet_tanh_6l/decode_wide_eval_closelm_xeon3.5/log 0.606776
exp/sgmm_5a/decode_wide_eval_closelm_xeon3.5/log 0.821786
exp/tri5a/decode_wide_eval_closelm_xeon3.5/log 0.988112
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter8_xeon3.5/log 1.09135
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm_xeon3.5/log 1.07229
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide_xeon3.5/log 0.714925
#
# 28M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_xeon3.5/log 0.924118
exp/nnet_8m_6l/decode_wide_eval_xeon3.5/log 1.00894
exp/nnet_tanh_3l/decode_wide_eval_xeon3.5/log 1.00994
exp/nnet_tanh_6l/decode_wide_eval_xeon3.5/log 0.940629
exp/sgmm_5a/decode_wide_eval_xeon3.5/log 1.14046
exp/tri5a/decode_wide_eval_xeon3.5/log 1.34804
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter8_xeon3.5/log 1.87827
exp/tri5a_mmi_b0.1/decode_wide_eval_xeon3.5/log 1.77104
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide_xeon3.5/log 1.0829
## Intel(R) Xeon(R) CPU L5420 @ 2.50GHz , 6G RAM
# 1.8M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval_closelm/log 1.18866
exp/nnet_8m_6l/decode_wide_eval_closelm_rerun/log 1.35657
exp/nnet_tanh_3l/decode_wide_eval_closelm/log 1.34746
exp/nnet_tanh_6l/decode_wide_eval_closelm/log 1.39452
exp/sgmm_5a/decode_wide_eval_closelm/log 1.71265
exp/tri5a/decode_wide_eval_closelm/log 2.29062
exp/tri5a_fmmi_b0.1/decode_wide_eval_closelm_iter8/log 2.66123
exp/tri5a_mmi_b0.1/decode_wide_eval_closelm/log 2.61595
exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide/log 2.18625
#
# 28M size L.fst, 76M size HCLG.fst
exp/nnet_4m_3l/decode_wide_eval/log 2.01065
exp/nnet_8m_6l/decode_wide_eval_rerun/log 2.16128
exp/nnet_tanh_3l/decode_wide_eval/log 2.16153
exp/nnet_tanh_6l/decode_wide_eval/log 2.09993
exp/sgmm_5a/decode_wide_eval/log 2.28183
exp/tri5a/decode_wide_eval/log 3.09584
exp/tri5a_fmmi_b0.1/decode_wide_eval_iter8/log 4.32505
exp/tri5a_mmi_b0.1/decode_wide_eval/log 4.14296
exp/tri5a_pretrain-dbn_dnn_smbr/decode_it2_dnnwide/log 3.08541
#!/bin/bash
# Apache 2.0. Copyright 2013, Hong Kong University of Science and Technology (author: Ricky Chan Ho Yin)
# This script calculates the average decoding real-time factor of a decoding directory by using the run time information inside the logs
if [ $# -ne 1 ] && [ $# -ne 2 ]; then
echo "Usage: $0 decode_directory [framePerSecond]"
echo ""
echo "## The default framerate framePerSecond = 100 i.e. 10ms sliding for input features during decoding"
exit
fi
decodeDIR=$1
if [ ! -d $decodeDIR/log ]; then
echo "decoding directory $decodeDIR/log not exist"
exit
fi
if [ $# -eq 2 ]; then
framePerSecond=$2
else
framePerSecond=100.0
fi
printf "$decodeDIR/log\t"
tail $decodeDIR/log/decode*.log | egrep -e 'Time taken .* real-time|Overall log-likelihood per frame' | awk -v fps=$framePerSecond 'BEGIN{sumTime=0; sumFrame=0;} {if($0 ~ / Time taken /) {pos=match($0, " [0-9.]+s:"); pos2=match($0, "s: real-time factor"); sumTime+=substr($0, pos+1, pos2-pos-1); } else {sumFrame+=$(NF-1)}; }; END{print sumTime/(sumFrame/fps)}'
......@@ -115,10 +115,10 @@ local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_2
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_3
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/sgmm_5a_mmi_b0.1/decode_wide_eval_closelm_4 # sgmm+bMMI
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_nnwide_eval_closelm # nnet 6 layers (983 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_wide_eval_closelm # nnet2 6 layers (1024 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_4m_3l/decode_wide_eval_closelm # nnet 4 layers (823 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn/decode_closelm_dnnwide # pretrained 6 layers RBM DNN
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_8m_6l/decode_nnwide_eval_closelm # nnet 6 hidden layers (983 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_tanh_6l/decode_wide_eval_closelm # nnet2 6 hidden layers (1024 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/nnet_4m_3l/decode_wide_eval_closelm # nnet 3 hidden layers (823 neurons)
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn/decode_closelm_dnnwide # pretrained 6 hidden layers RBM DNN
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it1_dnnwide
local/ext/score.sh data/eval exp/tri5a/graph_closelm exp/tri5a_pretrain-dbn_dnn_smbr/decode_closelm_it2_dnnwide # state level minimum bayes risk DNN
......
#!/bin/bash
# This runs on the 100 hour subset. This version of the recipe runs on GPUs.
# We assume you have 8 GPU machines. You have to use --num-threads 1 so it will
# use the version of the code that can use GPUs.
# We assume the queue is set up as in JHU (or as in the "Kluster" project
# on Sourceforge) where "gpu" is a consumable resource that you can set to
# number of GPU cards a machine has.
. cmd.sh
(
if [ ! -f exp/nnet5b/final.mdl ]; then
steps/nnet2/train_tanh.sh --cmd "$decode_cmd -l gpu=1" --parallel-opts "" --stage 0 \
--num-threads 1 \
--mix-up 8000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--num-jobs-nnet 8 --num-hidden-layers 4 \
--hidden-layer-dim 1024 \
data/train_100k_nodup data/lang exp/tri4a exp/nnet5b || exit 1;
fi
for lm_suffix in tg fsh_tgpr; do
steps/decode_nnet_cpu.sh --cmd "$decode_cmd" --nj 30 \
--config conf/decode.config --transform-dir exp/tri4a/decode_eval2000_sw1_${lm_suffix} \
exp/tri4a/graph_sw1_${lm_suffix} data/eval2000 exp/nnet5b/decode_eval2000_sw1_${lm_suffix} &
done
)
......@@ -141,6 +141,6 @@ preprocessor variables, setting compile options, linking with libraries, and so
\section build_setup_platforms Which platforms has Kaldi been compiled on?
We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including
Ubuntu, CentOS and SUSE), and Darwin.
Ubuntu, CentOS, Debian and SUSE), and Darwin.
*/
......@@ -842,11 +842,11 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
// ProcessTransition was called from "ProcessTransitions" in the non-pruned
// code, but now we in effect put the calls to ProcessTransition on a priority
// queue, and it now gets called directly from Determinize(). This function
// processes a transition from state "state". The set "subset" of Elements
// processes a transition from state "ostate_id". The set "subset" of Elements
// represents a set of next-states with associated weights and strings, each
// one arising from an arc from some state in a determinized-state; the
// next-states are unique (there is only one Entry assocated with each)
void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {
Weight forward_weight = output_states_[ostate_id]->forward_weight;
StringId common_str;
......
......@@ -223,7 +223,7 @@ BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *arc_post,
double this_alpha = alpha[s];
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2());
double arc_like = -ConvertToCost(arc.weight);
alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
}
Weight f = lat.Final(s);
......@@ -239,23 +239,32 @@ BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *arc_post,
double this_beta = -(f.Value1() + f.Value2());
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2()),
double arc_like = -ConvertToCost(arc.weight),
arc_beta = beta[arc.nextstate] + arc_like;
this_beta = LogAdd(this_beta, arc_beta);
int32 transition_id = arc.ilabel;
if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
// The following "if" is an optimization to avoid un-needed exp().
if (transition_id != 0 || acoustic_like_sum != NULL) {
double posterior = exp(alpha[s] + arc_beta - tot_forward_prob);
(*arc_post)[state_times[s]].push_back(std::make_pair(transition_id,
posterior));
if (acoustic_like_sum)
if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
(*arc_post)[state_times[s]].push_back(std::make_pair(transition_id,
posterior));
if (acoustic_like_sum != NULL)
*acoustic_like_sum -= posterior * arc.weight.Value2();
}
}
if (acoustic_like_sum != NULL && f != Weight::Zero()) {
double final_logprob = - ConvertToCost(f),
posterior = exp(alpha[s] + final_logprob - tot_forward_prob);
*acoustic_like_sum -= posterior * f.Value2();
}
beta[s] = this_beta;
}
double tot_backward_prob = beta[0];
if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
<< ", while total backward probability = " << tot_backward_prob;
}
// Now combine any posteriors with the same transition-id.
......@@ -308,6 +317,211 @@ void ConvertLatticeToPhones(const TransitionModel &trans,
} // end looping over states
}
static inline double LogAddOrMax(bool viterbi, double a, double b) {
if (viterbi)
return std::max(a, b);
else
return LogAdd(a, b);
}
// Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
// best-path negated cost) Note: in either case, the alphas and betas are
// negated costs. Requires that lat be topologically sorted. This code
// will work for either CompactLattice or Latice.
template<typename LatticeType>
static double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
bool viterbi,
vector<double> *alpha,
vector<double> *beta) {
typedef typename LatticeType::Arc Arc;
typedef typename Arc::Weight Weight;
typedef typename Arc::StateId StateId;
StateId num_states = lat.NumStates();
KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
KALDI_ASSERT(lat.Start() == 0);
alpha->resize(num_states, kLogZeroDouble);
beta->resize(num_states, kLogZeroDouble);
double tot_forward_prob = kLogZeroDouble;
(*alpha)[0] = 0.0;
// Propagate alphas forward.
for (StateId s = 0; s < num_states; s++) {
double this_alpha = (*alpha)[s];
for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -ConvertToCost(arc.weight);
(*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
this_alpha + arc_like);
}
Weight f = lat.Final(s);
if (f != Weight::Zero()) {
double final_like = this_alpha - ConvertToCost(f);
tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
}
}
for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
double this_beta = -ConvertToCost(lat.Final(s));
for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -ConvertToCost(arc.weight),
arc_beta = (*beta)[arc.nextstate] + arc_like;
this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
}
(*beta)[s] = this_beta;
}
double tot_backward_prob = (*beta)[lat.Start()];
if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
<< ", while total backward probability = " << tot_backward_prob;
}
// Split the difference when returning... they should be the same.
return 0.5 * (tot_backward_prob + tot_forward_prob);
}
/// This is used in CompactLatticeLimitDepth.
struct LatticeArcRecord {
BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
// minus the overall best-cost of the lattice.
CompactLatticeArc::StateId state; // state in the lattice.
size_t arc; // arc index within the state.
bool operator < (const LatticeArcRecord &other) const {
return logprob < other.logprob;
}
};
void CompactLatticeLimitDepth(int32 max_depth_per_frame,
CompactLattice *clat) {
typedef CompactLatticeArc Arc;
typedef Arc::Weight Weight;
typedef Arc::StateId StateId;
if (clat->Start() == fst::kNoStateId) {
KALDI_WARN << "Limiting depth of empty lattice.";
return;
}
if (clat->Properties(fst::kTopSorted, true) == 0) {
if (!TopSort(clat))
KALDI_ERR << "Topological sorting of lattice failed.";
}
vector<int32> state_times;
int32 T = CompactLatticeStateTimes(*clat, &state_times);
// The alpha and beta quantities here are "viterbi" alphas and beta.
std::vector<double> alpha;
std::vector<double> beta;
bool viterbi = true;
double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
&alpha, &beta);
std::vector<std::vector<LatticeArcRecord> > arc_records(T);
StateId num_states = clat->NumStates();
for (StateId s = 0; s < num_states; s++) {
for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
aiter.Next()) {
const Arc &arc = aiter.Value();
LatticeArcRecord arc_record;
arc_record.state = s;
arc_record.arc = aiter.Position();
arc_record.logprob =
(alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
- best_prob;
KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
for (int32 t = start_t; t < start_t + num_frames; t++) {
KALDI_ASSERT(t < T);
arc_records[t].push_back(arc_record);
}
}
}
StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
// to remove arcs (make them end
// there).
size_t max_depth = max_depth_per_frame;
for (int32 t = 0; t < T; t++) {
size_t size = arc_records[t].size();
if (size > max_depth) {
// we sort from worst to best, so we keep the later-numbered ones,
// and delete the lower-numbered ones.
size_t cutoff = size - max_depth;
std::nth_element(arc_records[t].begin(),
arc_records[t].begin() + cutoff,
arc_records[t].end());
for (size_t index = 0; index < cutoff; index++) {
LatticeArcRecord record(arc_records[t][index]);
fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
aiter.Seek(record.arc);
Arc arc = aiter.Value();
if (arc.nextstate != dead_state) { // not already killed.
arc.nextstate = dead_state;
aiter.SetValue(arc);
}
}
}
}
Connect(clat);
TopSortCompactLatticeIfNeeded(clat);
}
void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
if (clat->Properties(fst::kTopSorted, true) == 0) {
if (fst::TopSort(clat) == false) {
KALDI_ERR << "Topological sorting failed";
}
}
}
void TopSortLatticeIfNeeded(Lattice *lat) {
if (lat->Properties(fst::kTopSorted, true) == 0) {
if (fst::TopSort(lat) == false) {
KALDI_ERR << "Topological sorting failed";
}
}
}
/// Returns the depth of the lattice, defined as the average number of
/// arcs crossing any given frame. Returns 1 for empty lattices.
/// Requires that input is topologically sorted.
BaseFloat CompactLatticeDepth(const CompactLattice &clat,
int32 *num_frames) {
typedef CompactLattice::Arc::StateId StateId;
if (clat.Properties(fst::kTopSorted, true) == 0) {
KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
<< "sorted.";
}
if (clat.Start() == fst::kNoStateId) {
*num_frames = 0;
return 1.0;
}
size_t num_arc_frames = 0;
int32 t;
{
vector<int32> state_times;
t = CompactLatticeStateTimes(clat, &state_times);
}
if (num_frames != NULL)
*num_frames = t;
for (StateId s = 0; s < clat.NumStates(); s++) {
for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
aiter.Next()) {
const CompactLatticeArc &arc = aiter.Value();
num_arc_frames += arc.weight.String().size();
}
num_arc_frames += clat.Final(s).String().size();
}
return num_arc_frames / static_cast<BaseFloat>(t);
}
void ConvertCompactLatticeToPhones(const TransitionModel &trans,
CompactLattice *clat) {
typedef CompactLatticeArc Arc;
......@@ -350,13 +564,7 @@ bool LatticeBoost(const TransitionModel &trans,
BaseFloat max_silence_error,
Lattice *lat) {
kaldi::uint64 props = lat->Properties(fst::kFstProperties, false);
if (!(props & fst::kTopSorted)) {
if (fst::TopSort(lat) == false) {
KALDI_WARN << "Cycles detected in lattice";
return false;
}
}
TopSortLatticeIfNeeded(lat);
KALDI_ASSERT(IsSortedAndUniq(silence_phones));
KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
......@@ -467,7 +675,7 @@ BaseFloat LatticeForwardBackwardMpe(const Lattice &lat,
double this_alpha = alpha[s];
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2());
double arc_like = -ConvertToCost(arc.weight);
alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
}
Weight f = lat.Final(s);
......@@ -484,7 +692,7 @@ BaseFloat LatticeForwardBackwardMpe(const Lattice &lat,
double this_beta = -(f.Value1() + f.Value2());
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2()),
double arc_like = -ConvertToCost(arc.weight),
arc_beta = beta[arc.nextstate] + arc_like;
this_beta = LogAdd(this_beta, arc_beta);
}
......@@ -503,7 +711,7 @@ BaseFloat LatticeForwardBackwardMpe(const Lattice &lat,
double this_alpha = alpha[s];
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2());
double arc_like = -ConvertToCost(arc.weight);
double frame_acc = 0.0;
if (arc.ilabel != 0) {
int32 cur_time = state_times[s];
......@@ -528,7 +736,7 @@ BaseFloat LatticeForwardBackwardMpe(const Lattice &lat,
for (StateId s = num_states-1; s >= 0; s--) {
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2()),
double arc_like = -ConvertToCost(arc.weight),
arc_beta = beta[arc.nextstate] + arc_like;
double frame_acc = 0.0;
int32 transition_id = arc.ilabel;
......@@ -606,7 +814,7 @@ BaseFloat LatticeForwardBackwardSmbr(const Lattice &lat,
double this_alpha = alpha[s];
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2());
double arc_like = -ConvertToCost(arc.weight);
alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
}
Weight f = lat.Final(s);
......@@ -623,7 +831,7 @@ BaseFloat LatticeForwardBackwardSmbr(const Lattice &lat,
double this_beta = -(f.Value1() + f.Value2());
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2()),
double arc_like = -ConvertToCost(arc.weight),
arc_beta = beta[arc.nextstate] + arc_like;
this_beta = LogAdd(this_beta, arc_beta);
}
......@@ -642,7 +850,7 @@ BaseFloat LatticeForwardBackwardSmbr(const Lattice &lat,
double this_alpha = alpha[s];
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2());
double arc_like = -ConvertToCost(arc.weight);
double frame_acc = 0.0;
if (arc.ilabel != 0) {
int32 cur_time = state_times[s];
......@@ -668,7 +876,7 @@ BaseFloat LatticeForwardBackwardSmbr(const Lattice &lat,
for (StateId s = num_states-1; s >= 0; s--) {
for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
const Arc &arc = aiter.Value();
double arc_like = -(arc.weight.Value1() + arc.weight.Value2()),
double arc_like = -ConvertToCost(arc.weight),
arc_beta = beta[arc.nextstate] + arc_like;
double frame_acc = 0.0;
int32 transition_id = arc.ilabel;
......
......@@ -51,7 +51,8 @@ int32 CompactLatticeStateTimes(const CompactLattice &lat,
/// This function does the forward-backward over lattices and computes the
/// posterior probabilities of the arcs. It returns the total log-probability
/// of the lattice.
/// of the lattice. The Posterior quantities contain pairs of (transition-id, weight)
/// on each frame.
/// If the pointer "acoustic_like_sum" is provided, this value is set to
/// the sum over the arcs, of the posterior of the arc times the
/// acoustic likelihood [i.e. negated acoustic score] on that link.
......@@ -61,6 +62,29 @@ BaseFloat LatticeForwardBackward(const Lattice &lat,
Posterior *arc_post,
double *acoustic_like_sum = NULL);
/// Topologically sort the compact lattice if not already topologically sorted.
/// Will crash if the lattice cannot be topologically sorted.
void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
/// Topologically sort the lattice if not already topologically sorted.
/// Will crash if lattice cannot be topologically sorted.
void TopSortLatticeIfNeeded(Lattice *clat);
/// Returns the depth of the lattice, defined as the average number of arcs (or
/// final-prob strings) crossing any given frame. Returns 1 for empty lattices.
/// Requires that clat is topologically sorted!
BaseFloat CompactLatticeDepth(const CompactLattice &clat,
int32 *num_frames = NULL);
/// This function limits the depth of the lattice, per frame: that means, it
/// does not allow more than a specified number of arcs active on any given
/// frame. This can be used to reduce the size of the "very deep" portions of
/// the lattice.
void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
CompactLattice *clat);
/// Given a lattice, and a transition model to map pdf-ids to phones,
/// outputs for each frame the set of phones active on that frame. If
/// sil_phones (which must be sorted and uniq) is nonempty, it excludes
......
......@@ -64,6 +64,7 @@ void TestMinimizeCompactLattice() {
int main() {
using namespace kaldi;
using kaldi::int32;
SetVerboseLevel(4);
for (int32 i = 0; i < 1000; i++) {
TestMinimizeCompactLattice();
......
......@@ -111,6 +111,7 @@ void TestPushCompactLatticeWeights() {
int main() {
using namespace kaldi;
using kaldi::int32;
for (int32 i = 0; i < 15; i++) {
TestPushCompactLatticeStrings();
TestPushCompactLatticeWeights();
......
......@@ -17,7 +17,7 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
lattice-rescore-mapped lattice-depth lattice-align-phones \
lattice-to-smbr-post lattice-determinize-pruned-parallel \
lattice-add-penalty lattice-align-words-lexicon lattice-push \
lattice-minimize
lattice-minimize lattice-limit-depth
OBJFILES =
......
// latbin/lattice-copy.cc
// Copyright 2009-2011 Microsoft Corporation
// 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
......