Commit 428af53b authored by Jan Trmal's avatar Jan Trmal
Browse files

(trunk) Adding OpenFstWin specific patch -- includes all from...

(trunk) Adding OpenFstWin specific patch -- includes all from openfst-1.3.4.patch plus OpenFstWin specific things to make Kaldi compile under MSVC

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5040 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 66e162b2
diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h
index 5ad3b52..d9c0ca6 100644
--- a/src/include/fst/fst.h
+++ b/src/include/fst/fst.h
@@ -45,6 +45,12 @@ DECLARE_bool(fst_align);
namespace fst {
+ typedef ::int64 int64;
+ typedef ::uint64 uint64;
+ typedef ::int32 int32;
+ typedef ::uint32 uint32;
+
+
bool OPENFSTDLL IsFstHeader(istream &, const string &); //ChangedPD
class FstHeader;
diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h
index c4362f2..58cad44 100644
--- a/src/include/fst/interval-set.h
+++ b/src/include/fst/interval-set.h
@@ -37,38 +37,38 @@ template <typename T>
class IntervalSet {
public:
struct Interval {
- T begin;
- T end;
+ T begin_;
+ T end_;
- Interval() : begin(-1), end(-1) {}
+ Interval() : begin_(-1), end_(-1) {}
- Interval(T b, T e) : begin(b), end(e) {}
+ Interval(T b, T e) : begin_(b), end_(e) {}
bool operator<(const Interval &i) const {
- return begin < i.begin || (begin == i.begin && end > i.end);
+ return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_);
}
bool operator==(const Interval &i) const {
- return begin == i.begin && end == i.end;
+ return begin_ == i.begin_ && end_ == i.end_;
}
bool operator!=(const Interval &i) const {
- return begin != i.begin || end != i.end;
+ return begin_ != i.begin_ || end_ != i.end_;
}
istream &Read(istream &strm) {
T n;
ReadType(strm, &n);
- begin = n;
+ begin_ = n;
ReadType(strm, &n);
- end = n;
+ end_ = n;
return strm;
}
ostream &Write(ostream &strm) const {
- T n = begin;
+ T n = begin_;
WriteType(strm, n);
- n = end;
+ n = end_;
WriteType(strm, n);
return strm;
}
@@ -108,7 +108,7 @@ class IntervalSet {
lower_bound(intervals_.begin(), intervals_.end(), interval);
if (lb == intervals_.begin())
return false;
- return (--lb)->end > value;
+ return (--lb)->end_ > value;
}
// Requires intervals be normalized.
@@ -123,7 +123,7 @@ class IntervalSet {
bool Singleton() const {
return intervals_.size() == 1 &&
- intervals_[0].begin + 1 == intervals_[0].end;
+ intervals_[0].begin_ + 1 == intervals_[0].end_;
}
@@ -178,17 +178,17 @@ void IntervalSet<T>::Normalize() {
T size = 0;
for (T i = 0; i < intervals_.size(); ++i) {
Interval &inti = intervals_[i];
- if (inti.begin == inti.end)
+ if (inti.begin_ == inti.end_)
continue;
for (T j = i + 1; j < intervals_.size(); ++j) {
Interval &intj = intervals_[j];
- if (intj.begin > inti.end)
+ if (intj.begin_ > inti.end_)
break;
- if (intj.end > inti.end)
- inti.end = intj.end;
+ if (intj.end_ > inti.end_)
+ inti.end_ = intj.end_;
++i;
}
- count_ += inti.end - inti.begin;
+ count_ += inti.end_ - inti.begin_;
intervals_[size++] = inti;
}
intervals_.resize(size);
@@ -208,17 +208,17 @@ void IntervalSet<T>::Intersect(const IntervalSet<T> &iset,
oset->count_ = 0;
while (it1 != intervals_.end() && it2 != iintervals->end()) {
- if (it1->end <= it2->begin) {
+ if (it1->end_ <= it2->begin_) {
++it1;
- } else if (it2->end <= it1->begin) {
+ } else if (it2->end_ <= it1->begin_) {
++it2;
} else {
Interval interval;
- interval.begin = max(it1->begin, it2->begin);
- interval.end = min(it1->end, it2->end);
+ interval.begin_ = max(it1->begin_, it2->begin_);
+ interval.end_ = min(it1->end_, it2->end_);
ointervals->push_back(interval);
- oset->count_ += interval.end - interval.begin;
- if (it1->end < it2->end)
+ oset->count_ += interval.end_ - interval.begin_;
+ if (it1->end_ < it2->end_)
++it1;
else
++it2;
@@ -235,21 +235,21 @@ void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const {
oset->count_ = 0;
Interval interval;
- interval.begin = 0;
+ interval.begin_ = 0;
for (typename vector<Interval>::const_iterator it = intervals_.begin();
it != intervals_.end();
++it) {
- interval.end = min(it->begin, maxval);
- if (interval.begin < interval.end) {
+ interval.end_ = min(it->begin_, maxval);
+ if (interval.begin_ < interval.end_) {
ointervals->push_back(interval);
- oset->count_ += interval.end - interval.begin;
+ oset->count_ += interval.end_ - interval.begin_;
}
- interval.begin = it->end;
+ interval.begin_ = it->end_;
}
- interval.end = maxval;
- if (interval.begin < interval.end) {
+ interval.end_ = maxval;
+ if (interval.begin_ < interval.end_) {
ointervals->push_back(interval);
- oset->count_ += interval.end - interval.begin;
+ oset->count_ += interval.end_ - interval.begin_;
}
}
@@ -263,7 +263,7 @@ void IntervalSet<T>::Difference(const IntervalSet<T> &iset,
oset->count_ = 0;
} else {
IntervalSet<T> cset;
- iset.Complement(intervals_.back().end, &cset);
+ iset.Complement(intervals_.back().end_, &cset);
Intersect(cset, oset);
}
}
@@ -277,9 +277,9 @@ bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const {
typename vector<Interval>::const_iterator it2 = intervals->begin();
while (it1 != intervals_.end() && it2 != intervals->end()) {
- if (it1->end <= it2->begin) {
+ if (it1->end_ <= it2->begin_) {
++it1;
- } else if (it2->end <= it1->begin) {
+ } else if (it2->end_ <= it1->begin_) {
++it2;
} else {
return true;
@@ -300,21 +300,21 @@ bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const {
bool overlap = false; // point in both intervals_ and intervals
while (it1 != intervals_.end() && it2 != intervals->end()) {
- if (it1->end <= it2->begin) { // no overlap - it1 first
+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first
only1 = true;
++it1;
- } else if (it2->end <= it1->begin) { // no overlap - it2 first
+ } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first
only2 = true;
++it2;
- } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals
+ } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals
overlap = true;
++it1;
++it2;
- } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2
+ } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2
only2 = true;
overlap = true;
++it1;
- } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1
+ } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1
only1 = true;
overlap = true;
++it2;
@@ -346,11 +346,11 @@ bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const {
typename vector<Interval>::const_iterator it2 = intervals->begin();
while (it1 != intervals_.end() && it2 != intervals->end()) {
- if (it1->end <= it2->begin) { // no overlap - it1 first
+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first
++it1;
- } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C
+ } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C
return false;
- } else if (it2->end == it1->end) {
+ } else if (it2->end_ == it1->end_) {
++it1;
++it2;
} else {
@@ -370,7 +370,7 @@ ostream &operator<<(ostream &strm, const IntervalSet<T> &s) {
++it) {
if (it != intervals->begin())
strm << ",";
- strm << "[" << it->begin << "," << it->end << ")";
+ strm << "[" << it->begin_ << "," << it->end_ << ")";
}
strm << "}";
return strm;
diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h
index a7c3360..491ef7d 100644
--- a/src/include/fst/label-reachable.h
+++ b/src/include/fst/label-reachable.h
@@ -359,9 +359,9 @@ class LabelReachable {
iiter = intervals->begin();
iiter != intervals->end(); ++iiter) {
begin_low = LowerBound(aiter, end_low, aiter_end,
- aiter_input, iiter->begin);
+ aiter_input, iiter->begin_);
end_low = LowerBound(aiter, begin_low, aiter_end,
- aiter_input, iiter->end);
+ aiter_input, iiter->end_);
if (end_low - begin_low > 0) {
if (reach_begin_ < 0)
reach_begin_ = begin_low;
diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h
index 3fbe3ba..6e9dd3d 100644
--- a/src/include/fst/minimize.h
+++ b/src/include/fst/minimize.h
@@ -134,7 +134,14 @@ class CyclicMinimizer {
typedef typename A::Weight Weight;
typedef ReverseArc<A> RevA;
- CyclicMinimizer(const ExpandedFst<A>& fst) {
+ CyclicMinimizer(const ExpandedFst<A>& fst):
+ // tell the Partition data-member to expect multiple repeated
+ // calls to SplitOn with the same element if we are non-deterministic.
+ P_(fst.Properties(kIDeterministic, true) == 0) {
+ if(fst.Properties(kIDeterministic, true) == 0)
+ CHECK(Weight::Properties() & kIdempotent); // this minimization
+ // algorithm for non-deterministic FSTs can only work with idempotent
+ // semirings.
Initialize(fst);
Compute(fst);
}
@@ -315,7 +322,13 @@ class AcyclicMinimizer {
typedef typename A::StateId ClassId;
typedef typename A::Weight Weight;
- AcyclicMinimizer(const ExpandedFst<A>& fst) {
+ AcyclicMinimizer(const ExpandedFst<A>& fst):
+ // tell the Partition data-member to expect multiple repeated
+ // calls to SplitOn with the same element if we are non-deterministic.
+ partition_(fst.Properties(kIDeterministic, true) == 0) {
+ if(fst.Properties(kIDeterministic, true) == 0)
+ CHECK(Weight::Properties() & kIdempotent); // minimization for
+ // non-deterministic FSTs can only work with idempotent semirings.
Initialize(fst);
Refine(fst);
}
@@ -531,13 +544,7 @@ template <class A>
void Minimize(MutableFst<A>* fst,
MutableFst<A>* sfst = 0,
float delta = kDelta) {
- uint64 props = fst->Properties(kAcceptor | kIDeterministic|
- kWeighted | kUnweighted, true);
- if (!(props & kIDeterministic)) {
- FSTERROR() << "FST is not deterministic";
- fst->SetProperties(kError, kError);
- return;
- }
+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true);
if (!(props & kAcceptor)) { // weighted transducer
VectorFst< GallicArc<A, STRING_LEFT> > gfst;
diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h
index dcee67b..40b849a 100644
--- a/src/include/fst/partition.h
+++ b/src/include/fst/partition.h
@@ -43,8 +43,8 @@ class Partition {
friend class PartitionIterator<T>;
struct Element {
- Element() : value(0), next(0), prev(0) {}
- Element(T v) : value(v), next(0), prev(0) {}
+ Element() : value(0), next(0), prev(0) {}
+ Element(T v) : value(v), next(0), prev(0) {}
T value;
Element* next;
@@ -52,9 +52,11 @@ class Partition {
};
public:
- Partition() {}
+ Partition(bool allow_repeated_split):
+ allow_repeated_split_(allow_repeated_split) {}
- Partition(T num_states) {
+ Partition(bool allow_repeated_split, T num_states):
+ allow_repeated_split_(allow_repeated_split) {
Initialize(num_states);
}
@@ -137,16 +139,16 @@ class Partition {
if (class_size_[class_id] == 1) return;
// first time class is split
- if (split_size_[class_id] == 0)
+ if (split_size_[class_id] == 0) {
visited_classes_.push_back(class_id);
-
+ class_split_[class_id] = classes_[class_id];
+ }
// increment size of split (set of element at head of chain)
split_size_[class_id]++;
-
+
// update split point
- if (class_split_[class_id] == 0)
- class_split_[class_id] = classes_[class_id];
- if (class_split_[class_id] == elements_[element_id])
+ if (class_split_[class_id] != 0
+ && class_split_[class_id] == elements_[element_id])
class_split_[class_id] = elements_[element_id]->next;
// move to head of chain in same class
@@ -157,24 +159,31 @@ class Partition {
// class indices of the newly created class. Returns the new_class id
// or -1 if no new class was created.
T SplitRefine(T class_id) {
+
+ Element* split_el = class_split_[class_id];
// only split if necessary
- if (class_size_[class_id] == split_size_[class_id]) {
- class_split_[class_id] = 0;
+ //if (class_size_[class_id] == split_size_[class_id]) {
+ if(split_el == NULL) { // we split on everything...
split_size_[class_id] = 0;
return -1;
} else {
-
T new_class = AddClass();
+
+ if(allow_repeated_split_) { // split_size_ is possibly
+ // inaccurate, so work it out exactly.
+ size_t split_count; Element *e;
+ for(split_count=0,e=classes_[class_id];
+ e != split_el; split_count++, e=e->next);
+ split_size_[class_id] = split_count;
+ }
size_t remainder = class_size_[class_id] - split_size_[class_id];
if (remainder < split_size_[class_id]) { // add smaller
- Element* split_el = class_split_[class_id];
classes_[new_class] = split_el;
- class_size_[class_id] = split_size_[class_id];
- class_size_[new_class] = remainder;
split_el->prev->next = 0;
split_el->prev = 0;
+ class_size_[class_id] = split_size_[class_id];
+ class_size_[new_class] = remainder;
} else {
- Element* split_el = class_split_[class_id];
classes_[new_class] = classes_[class_id];
class_size_[class_id] = remainder;
class_size_[new_class] = split_size_[class_id];
@@ -245,10 +254,16 @@ class Partition {
vector<T> class_size_;
// size of split for each class
+ // in the nondeterministic case, split_size_ is actually an upper
+ // bound on the size of split for each class.
vector<T> split_size_;
// set of visited classes to be used in split refine
vector<T> visited_classes_;
+
+ // true if input fst was deterministic: we can make
+ // certain assumptions in this case that speed up the algorithm.
+ bool allow_repeated_split_;
};
diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h
index 6d0c971..1da922e 100644
--- a/src/include/fst/state-reachable.h
+++ b/src/include/fst/state-reachable.h
@@ -112,7 +112,7 @@ class IntervalReachVisitor {
void FinishState(StateId s, StateId p, const A *arc) {
if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) {
vector<Interval> *intervals = (*isets_)[s].Intervals();
- (*intervals)[0].end = index_; // Update tree interval end
+ (*intervals)[0].end_ = index_; // Update tree interval end
}
(*isets_)[s].Normalize();
if (p != kNoStateId)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment