Commit 1ee92df2 by Dan Povey

### Commit further changes to latgen paper, mostly cleanup.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@517 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 13886a9f
 ... @@ -157,9 +157,9 @@ than weights, where a cost is a floating point number that typically represents ... @@ -157,9 +157,9 @@ than weights, where a cost is a floating point number that typically represents log-probability. A WFST has a set of states with one distinguished log-probability. A WFST has a set of states with one distinguished start state\footnote{This is the formulation that corresponds best with the toolkit we use.}, start state\footnote{This is the formulation that corresponds best with the toolkit we use.}, each state has a final-cost (or $\infty$ for non-final states); each state has a final-cost (or $\infty$ for non-final states); and there is a set of arcs, where each arc $a$ has a previous-state $p[a]$, a next-state $n[a]$, a and there is a set of arcs, where each arc has a weight, weight $w[a]$ (just think of this as a cost for now), an input label $i[a]$ and an output weight (just think of this as a cost for now), an input label and an output label $o[a]$. In $\HCLG$, the input labels are the identifiers of context-dependent label. In $\HCLG$, the input labels are the identifiers of context-dependent HMM states, and the output labels represent words. For both the input and output HMM states, and the output labels represent words. For both the input and output symbols, the special label $\epsilon$ may appear meaning no label is present.'' symbols, the special label $\epsilon$ may appear meaning no label is present.'' ... @@ -219,7 +219,7 @@ be concerned with the accuracy of the information in the lattice (e.g. that the ... @@ -219,7 +219,7 @@ be concerned with the accuracy of the information in the lattice (e.g. that the scores and alignments are correct) and the completeness of such information (e.g. scores and alignments are correct) and the completeness of such information (e.g. that no high-scoring word-sequences are missing). The simplest that no high-scoring word-sequences are missing). The simplest way to formalize these concerns is to express them in terms of a lattice way to formalize these concerns is to express them in terms of a lattice pruning beam $\alpha > 0$ (interpret $\alpha$ as a natural logarithm). pruning beam $\alpha > 0$ (interpret this as a log likelihood difference). \begin{itemize} \begin{itemize} \item The lattice should have a path for every word sequence within $\alpha$ of the best-scoring one. \item The lattice should have a path for every word sequence within $\alpha$ of the best-scoring one. \item The scores and alignments in the lattice should be accurate. \item The scores and alignments in the lattice should be accurate. ... @@ -429,10 +429,10 @@ that we will describe below. ... @@ -429,10 +429,10 @@ that we will describe below. The resulting lattice $L$ is a deterministic, acyclic weighted acceptor with the The resulting lattice $L$ is a deterministic, acyclic weighted acceptor with the words as the labels, and the graph and acoustic costs and the alignments words as the labels, and the graph and acoustic costs and the alignments encoded into the weights. Of course, the costs and alignments are not in any encoded into the weights. The costs and alignments are not synchronized'' sense synchronized'' with the words. with the words. \section{Details of our $\epsilon$ removal and determinization algorithm} \section{Details of our determinization algorithm} \label{sec:details} \label{sec:details} We implemented $\epsilon$ removal and determinization as a single algorithm We implemented $\epsilon$ removal and determinization as a single algorithm ... ...
 ... @@ -16,7 +16,7 @@ ... @@ -16,7 +16,7 @@ year = 1997 year = 1997 } } @thesis{ odell_thesis, @phdthesis{ odell_thesis, title={The use of context in large vocabulary speech recognition}, title={The use of context in large vocabulary speech recognition}, author={Odell, J.J.}, author={Odell, J.J.}, year={1995}, year={1995}, ... @@ -31,7 +31,7 @@ ... @@ -31,7 +31,7 @@ } } @inproceedings{saon2005anatomy, @inproceedings{saon2005anatomy, title={Anatomy of an extremely fast LVCSR decoder}, title={{Anatomy of an extremely fast LVCSR decoder}}, author={Saon, G. and Povey, D. and Zweig, G.}, author={Saon, G. and Povey, D. and Zweig, G.}, booktitle={Ninth European Conference on Speech Communication and Technology}, booktitle={Ninth European Conference on Speech Communication and Technology}, year={2005} year={2005} ... @@ -397,10 +397,9 @@ ... @@ -397,10 +397,9 @@ @inproceedings{roark2011lexicographic, @inproceedings{roark2011lexicographic, title={Lexicographic semirings for exact automata encoding of sequence models}, title={Lexicographic semirings for exact automata encoding of sequence models}, author={Roark, B. and Sproat, R. and Shafran, I.}, author={Roark, B. and Sproat, R. and Shafran, I.}, booktitle={Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: short papers-Volume 2}, booktitle={Proc. ACL-HLT, 2011, Portland, OR}, pages={1--5}, pages={1--5}, year={2011}, year={2011} organization={Association for Computational Linguistics} } } @Article{ @Article{ ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!