Commit 7e811a76 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1: various enhancements, minor update of LSTM code,

- lstm : implemented gradient-clipping, extended gradient print to show stats of forward / backward pass.
- nnet-loss.cc : bugfix in Xent::Eval (avoiding log(0) in cross-entropy computation)
- nnet-pdf-priors.{h,cc},nnet-forward.cc : refactored, behavior stays the same
- train-transitions : updates transition probabilities in transition model (but there was no WER improvment)




git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4904 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 64e2290c
......@@ -19,4 +19,13 @@ cuda_cmd="queue.pl -l arch=*64 -l gpu=1"
# you avoid the latency of starting GridEngine jobs.
# BUT cluster:
host=$(hostname)
if [ ${host#*.} == "fit.vutbr.cz" ]; then
queue="all.q@@blade,all.q@@speech"
gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
storage="matylda5"
export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
fi
......@@ -10,20 +10,18 @@
#export decode_cmd=run.pl
#export cuda_cmd=run.pl
#JHU cluster:
export train_cmd="queue.pl"
export decode_cmd="queue.pl --mem 3G"
export cuda_cmd="queue.pl --gpu 1"
# BUT cluster:
#export train_cmd="queue.pl -q all.q@blade[01][0126789][123456789] -l ram_free=2500M,mem_free=2500M,matylda5=0.5"
#export decode_cmd="queue.pl -q all.q@blade[01][0126789][123456789] -l ram_free=3000M,mem_free=3000M,matylda5=0.1"
#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu -l gpu=1"
#
#a) JHU cluster options
# JHU cluster:
export train_cmd="queue.pl -l arch=*64*"
export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
export cuda_cmd="queue.pl -l arch=*64*,gpu=1 -q g.q"
export mkgraph_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
# BUT cluster:
host=$(hostname)
if [ ${host#*.} == "fit.vutbr.cz" ]; then
queue="all.q@@blade,all.q@@speech"
gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
storage="matylda5"
export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1"
export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5"
export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1"
fi
......@@ -43,9 +43,10 @@ if [ $stage -le 1 ]; then
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --network-type lstm --learn-rate 0.0001 \
steps/nnet/train.sh --network-type lstm --learn-rate 0.00001 \
--cmvn-opts "--norm-means=true --norm-vars=true" --feat-type plain --splice 0 \
--train-opts "--momentum 0.9 --halving-factor 0.8" \
--proto-opts "--clip-gradient 5.0" \
--train-opts "--momentum 0.9 --halving-factor 0.65" \
--train-tool "nnet-train-lstm-streams --num-stream=4 --targets-delay=5" \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
......
......@@ -34,6 +34,8 @@ parser.add_option('--lstm-stddev-factor', dest='lstm_stddev_factor', type='float
help='Standard deviation of initialization [default: %default]');
parser.add_option('--param-stddev-factor', dest='param_stddev_factor', type='float', default=0.04,
help='Standard deviation in output layer [default: %default]');
parser.add_option('--clip-gradient', dest='clip_gradient', type='float', default=5.0,
help='Clipping constant applied to gradients [default: %default]');
#
(o,args) = parser.parse_args()
if len(args) != 2 :
......@@ -51,8 +53,8 @@ if len(args) != 2 :
#</NnetProto>
print "<NnetProto>"
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor)
print "<LstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" % \
(o.num_recurrent, num_leaves, o.param_stddev_factor)
print "<Softmax> <InputDim> %d <OutputDim> %d" % \
......
......@@ -2008,6 +2008,39 @@ Real CuMatrixBase<Real>::Sum() const {
return row_sum.Sum();
}
template<typename Real>
Real CuMatrixBase<Real>::Max() const {
Timer tim;
// TODO rewrite in CUDA,
Matrix<Real> tmp(NumRows(), NumCols(), kUndefined);
CopyToMat(&tmp);
Real ans = tmp.Max();
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
}
#endif
return ans;
}
template<typename Real>
Real CuMatrixBase<Real>::Min() const {
Timer tim;
// TODO rewrite in CUDA,
Matrix<Real> tmp(NumRows(), NumCols(), kUndefined);
CopyToMat(&tmp);
Real ans = tmp.Min();
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
}
#endif
return ans;
}
template<typename Real>
Real CuMatrixBase<Real>::Trace(bool check_square) const {
#if HAVE_CUDA == 1
......
......@@ -417,6 +417,8 @@ class CuMatrixBase {
}
Real Sum() const;
Real Max() const; ///< proxy to MatrixBase::Max(), cuda not used
Real Min() const; ///< proxy to MatrixBase::Min(), cuda not used
/// Return the trace. If check_square = true, will crash if matrix is not square.
Real Trace(bool check_square = true) const;
......
......@@ -320,6 +320,25 @@ MatrixIndexT CuVectorBase<Real>::ApplyFloor(Real floor_val) {
}
template<typename Real>
void CuVectorBase<Real>::ApplyCeiling(Real ceiling_val) {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
if (dim_ == 0) return;
Timer tim;
dim3 dimBlock(CU1DBLOCK, 1);
dim3 dimGrid(n_blocks(Dim(), CU1DBLOCK), 1);
MatrixDim pseudo_matrix_dim = { 1, Dim(), Dim() }; // vector is a matix with 1 row,
cuda_apply_ceiling(dimGrid, dimBlock, data_, ceiling_val, pseudo_matrix_dim);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile("CuVectorBase::ApplyCeiling", tim.Elapsed());
} else
#endif
{
Vec().ApplyCeiling(ceiling_val);
}
}
template<typename Real>
void CuVectorBase<Real>::ApplyPow(Real power) {
#if HAVE_CUDA == 1
......
......@@ -122,6 +122,7 @@ class CuVectorBase {
void ApplyExp();
void ApplyLog();
MatrixIndexT ApplyFloor(Real floor_val);
void ApplyCeiling(Real ceiling_val);
void ApplyPow(Real power);
Real Sum() const;
void SetRandn();
......
......@@ -65,8 +65,13 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
KALDI_ASSERT(net_out.NumRows() == frame_weights.Dim());
diff->Resize(net_out.NumRows(), net_out.NumCols());
KALDI_ASSERT(KALDI_ISFINITE(frame_weights.Sum()));
KALDI_ASSERT(KALDI_ISFINITE(net_out.Sum()));
KALDI_ASSERT(KALDI_ISFINITE(target.Sum()));
double num_frames = frame_weights.Sum();
KALDI_ASSERT(num_frames >= 0.0)
// get frame_weights to GPU,
frame_weights_ = frame_weights;
......@@ -84,6 +89,7 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
// calculate cross_entropy (in GPU),
xentropy_aux_ = net_out; // y
xentropy_aux_.Add(1e-20); // avoid log(0)
xentropy_aux_.ApplyLog(); // log(y)
xentropy_aux_.MulElements(target); // t*log(y)
xentropy_aux_.MulRowsVec(frame_weights_); // w*t*log(y)
......@@ -97,6 +103,9 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
entropy_aux_.MulRowsVec(frame_weights_); // w*t*log(t)
double entropy = -entropy_aux_.Sum();
KALDI_ASSERT(KALDI_ISFINITE(cross_entropy));
KALDI_ASSERT(KALDI_ISFINITE(entropy));
loss_ += cross_entropy;
entropy_ += entropy;
correct_ += correct;
......@@ -109,7 +118,9 @@ void Xent::Eval(const VectorBase<BaseFloat> &frame_weights,
loss_progress_ += cross_entropy;
entropy_progress_ += entropy;
if (frames_progress_ > progress_step) {
KALDI_VLOG(1) << "ProgressLoss[" << frames_progress_/100/3600 << "h/" << frames_/100/3600 << "h]: "
KALDI_VLOG(1) << "ProgressLoss[last "
<< static_cast<int>(frames_progress_/100/3600) << "h of "
<< static_cast<int>(frames_/100/3600) << "h]: "
<< (loss_progress_-entropy_progress_)/frames_progress_ << " (Xent)";
// store
loss_vec_.push_back((loss_progress_-entropy_progress_)/frames_progress_);
......@@ -161,9 +172,17 @@ void Mse::Eval(const VectorBase<BaseFloat> &frame_weights,
const CuMatrixBase<BaseFloat>& net_out,
const CuMatrixBase<BaseFloat>& target,
CuMatrix<BaseFloat>* diff) {
// check inputs,
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
KALDI_ASSERT(net_out.NumRows() == frame_weights.Dim());
KALDI_ASSERT(KALDI_ISFINITE(frame_weights.Sum()));
KALDI_ASSERT(KALDI_ISFINITE(net_out.Sum()));
KALDI_ASSERT(KALDI_ISFINITE(target.Sum()));
int32 num_frames = frame_weights.Sum();
KALDI_ASSERT(num_frames >= 0.0)
// get frame_weights to GPU,
frame_weights_ = frame_weights;
......@@ -179,17 +198,21 @@ void Mse::Eval(const VectorBase<BaseFloat> &frame_weights,
diff_pow_2_.MulRowsVec(frame_weights_); // w*(y - t)^2
double mean_square_error = 0.5 * diff_pow_2_.Sum(); // sum the matrix,
KALDI_ASSERT(KALDI_ISFINITE(mean_square_error));
// accumulate
loss_ += mean_square_error;
frames_ += num_frames;
// progressive loss reporting
{
static const int32 progress_step = 1e6; // 2.77h
static const int32 progress_step = 3600*100; // 1h
frames_progress_ += num_frames;
loss_progress_ += mean_square_error;
if (frames_progress_ > progress_step) {
KALDI_VLOG(1) << "ProgressLoss[" << frames_progress_/100/3600 << "h/" << frames_/100/3600 << "h]: "
KALDI_VLOG(1) << "ProgressLoss[last "
<< static_cast<int>(frames_progress_/100/3600) << "h of "
<< static_cast<int>(frames_/100/3600) << "h]: "
<< loss_progress_/frames_progress_ << " (Mse)";
// store
loss_vec_.push_back(loss_progress_/frames_progress_);
......
......@@ -47,8 +47,9 @@ public:
UpdatableComponent(input_dim, output_dim),
ncell_(0),
nrecur_(output_dim),
//dropout_rate_(0.0),
nstream_(0)
nstream_(0),
clip_gradient_(0.0)
//, dropout_rate_(0.0)
{ }
~LstmProjectedStreams()
......@@ -80,6 +81,8 @@ public:
ReadToken(is, false, &token);
if (token == "<CellDim>")
ReadBasicType(is, false, &ncell_);
else if (token == "<ClipGradient>")
ReadBasicType(is, false, &clip_gradient_);
//else if (token == "<DropoutRate>")
// ReadBasicType(is, false, &dropout_rate_);
else if (token == "<ParamScale>")
......@@ -120,11 +123,14 @@ public:
w_r_m_corr_.Resize(nrecur_, ncell_, kSetZero);
KALDI_ASSERT(clip_gradient_ >= 0.0);
}
void ReadData(std::istream &is, bool binary) {
ExpectToken(is, binary, "<CellDim>");
ReadBasicType(is, binary, &ncell_);
ExpectToken(is, binary, "<ClipGradient>");
ReadBasicType(is, binary, &clip_gradient_);
//ExpectToken(is, binary, "<DropoutRate>");
//ReadBasicType(is, binary, &dropout_rate_);
......@@ -153,6 +159,8 @@ public:
void WriteData(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<CellDim>");
WriteBasicType(os, binary, ncell_);
WriteToken(os, binary, "<ClipGradient>");
WriteBasicType(os, binary, clip_gradient_);
//WriteToken(os, binary, "<DropoutRate>");
//WriteBasicType(os, binary, dropout_rate_);
......@@ -218,14 +226,53 @@ public:
}
std::string InfoGradient() const {
// disassemble forward-propagation buffer into different neurons,
const CuSubMatrix<BaseFloat> YG(propagate_buf_.ColRange(0*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YI(propagate_buf_.ColRange(1*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YF(propagate_buf_.ColRange(2*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YO(propagate_buf_.ColRange(3*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YC(propagate_buf_.ColRange(4*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YH(propagate_buf_.ColRange(5*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YM(propagate_buf_.ColRange(6*ncell_, ncell_));
const CuSubMatrix<BaseFloat> YR(propagate_buf_.ColRange(7*ncell_, nrecur_));
// disassemble backpropagate buffer into different neurons,
const CuSubMatrix<BaseFloat> DG(backpropagate_buf_.ColRange(0*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DI(backpropagate_buf_.ColRange(1*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DF(backpropagate_buf_.ColRange(2*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DO(backpropagate_buf_.ColRange(3*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DC(backpropagate_buf_.ColRange(4*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DH(backpropagate_buf_.ColRange(5*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DM(backpropagate_buf_.ColRange(6*ncell_, ncell_));
const CuSubMatrix<BaseFloat> DR(backpropagate_buf_.ColRange(7*ncell_, nrecur_));
return std::string(" ") +
"\n w_gifo_x_corr_ " + MomentStatistics(w_gifo_x_corr_) +
"\n w_gifo_r_corr_ " + MomentStatistics(w_gifo_r_corr_) +
"\n bias_corr_ " + MomentStatistics(bias_corr_) +
"\n peephole_i_c_corr_ " + MomentStatistics(peephole_i_c_corr_) +
"\n peephole_f_c_corr_ " + MomentStatistics(peephole_f_c_corr_) +
"\n peephole_o_c_corr_ " + MomentStatistics(peephole_o_c_corr_) +
"\n w_r_m_corr_ " + MomentStatistics(w_r_m_corr_);
"\n Gradients:" +
"\n w_gifo_x_corr_ " + MomentStatistics(w_gifo_x_corr_) +
"\n w_gifo_r_corr_ " + MomentStatistics(w_gifo_r_corr_) +
"\n bias_corr_ " + MomentStatistics(bias_corr_) +
"\n peephole_i_c_corr_ " + MomentStatistics(peephole_i_c_corr_) +
"\n peephole_f_c_corr_ " + MomentStatistics(peephole_f_c_corr_) +
"\n peephole_o_c_corr_ " + MomentStatistics(peephole_o_c_corr_) +
"\n w_r_m_corr_ " + MomentStatistics(w_r_m_corr_) +
"\n Forward-pass:" +
"\n YG " + MomentStatistics(YG) +
"\n YI " + MomentStatistics(YI) +
"\n YF " + MomentStatistics(YF) +
"\n YC " + MomentStatistics(YC) +
"\n YH " + MomentStatistics(YH) +
"\n YO " + MomentStatistics(YO) +
"\n YM " + MomentStatistics(YM) +
"\n YR " + MomentStatistics(YR) +
"\n Backward-pass:" +
"\n DG " + MomentStatistics(DG) +
"\n DI " + MomentStatistics(DI) +
"\n DF " + MomentStatistics(DF) +
"\n DC " + MomentStatistics(DC) +
"\n DH " + MomentStatistics(DH) +
"\n DO " + MomentStatistics(DO) +
"\n DM " + MomentStatistics(DM) +
"\n DR " + MomentStatistics(DR);
}
void ResetLstmStreams(const std::vector<int32> &stream_reset_flag) {
......@@ -522,7 +569,24 @@ public:
w_r_m_corr_.AddMatMat(1.0, DR.RowRange(1*S,T*S), kTrans,
YM.RowRange(1*S,T*S), kNoTrans, mmt);
if (clip_gradient_ > 0.0) {
w_gifo_x_corr_.ApplyFloor(-clip_gradient_);
w_gifo_x_corr_.ApplyCeiling(clip_gradient_);
w_gifo_r_corr_.ApplyFloor(-clip_gradient_);
w_gifo_r_corr_.ApplyCeiling(clip_gradient_);
bias_corr_.ApplyFloor(-clip_gradient_);
bias_corr_.ApplyCeiling(clip_gradient_);
w_r_m_corr_.ApplyFloor(-clip_gradient_);
w_r_m_corr_.ApplyCeiling(clip_gradient_);
peephole_i_c_corr_.ApplyFloor(-clip_gradient_);
peephole_i_c_corr_.ApplyCeiling(clip_gradient_);
peephole_f_c_corr_.ApplyFloor(-clip_gradient_);
peephole_f_c_corr_.ApplyCeiling(clip_gradient_);
peephole_o_c_corr_.ApplyFloor(-clip_gradient_);
peephole_o_c_corr_.ApplyCeiling(clip_gradient_);
}
if (DEBUG) {
std::cerr << "gradients(with optional momentum): \n";
std::cerr << "w_gifo_x_corr_ " << w_gifo_x_corr_;
......@@ -619,6 +683,9 @@ private:
CuMatrix<BaseFloat> prev_nnet_state_;
// gradient-clipping value,
BaseFloat clip_gradient_;
// non-recurrent dropout
//BaseFloat dropout_rate_;
//CuMatrix<BaseFloat> dropout_mask_;
......
......@@ -26,56 +26,47 @@ namespace nnet1 {
PdfPrior::PdfPrior(const PdfPriorOptions &opts)
: prior_scale_(opts.prior_scale) {
if (opts.class_frame_counts == "") {
//Empty file with counts is not an error,
//there are cases when PdfPrior is not active
//(example: nnet-forward over feature transform, bn-feature extractor)
// class_frame_counts is empty, the PdfPrior is deactivated...
// (for example when 'nnet-forward' generates bottleneck features)
return;
//KALDI_ERR << "--class-frame-counts is empty: Cannot initialize priors "
// << "without the counts.";
}
Vector<double> tmp_priors;
KALDI_LOG << "Computing pdf-priors from : " << opts.class_frame_counts;
Vector<double> frame_counts, rel_freq, log_priors;
{
Input in;
in.OpenTextMode(opts.class_frame_counts);
tmp_priors.Read(in.Stream(), false);
frame_counts.Read(in.Stream(), false);
in.Close();
}
int32 prior_dim = tmp_priors.Dim();
Vector<BaseFloat> tmp_mask(prior_dim, kSetZero);
int32 num_cutoff = 0;
for (int32 i = 0; i < prior_dim; i++) {
if (tmp_priors(i) < opts.prior_cutoff) {
tmp_priors(i) = opts.prior_cutoff;
tmp_mask(i) = FLT_MAX/2; // not using -kLogZeroFloat to prevent NANs
num_cutoff++;
}
}
if (num_cutoff > 0) {
KALDI_WARN << num_cutoff << " out of " << prior_dim << " classes have counts"
<< " lower than " << opts.prior_cutoff;
}
double sum = tmp_priors.Sum();
tmp_priors.Scale(1.0 / sum);
tmp_priors.ApplyLog();
for (int32 i = 0; i < prior_dim; i++) {
KALDI_ASSERT(tmp_priors(i) != kLogZeroDouble);
}
// get relative frequencies,
rel_freq = frame_counts;
rel_freq.Scale(1.0/frame_counts.Sum());
// get the log-prior,
log_priors = rel_freq;
log_priors.Add(1e-20);
log_priors.ApplyLog();
// Make the priors for classes with low counts +inf (i.e. -log(0)) such that
// the classes have 0 likelihood (i.e. -inf log-likelihood). We use FLT_MAX/2
// instead of -kLogZeroFloat to prevent NANs from appearing in computation.
Vector<BaseFloat> tmp_priors_f(tmp_priors);
tmp_priors_f.AddVec(1.0, tmp_mask);
int32 num_floored = 0;
for (int32 i=0; i<log_priors.Dim(); i++) {
if (rel_freq(i) < opts.prior_floor) {
log_priors(i) = FLT_MAX/2;
num_floored++;
}
}
KALDI_LOG << "Floored " << num_floored << " pdf-priors";
// sanity check,
KALDI_ASSERT(KALDI_ISFINITE(log_priors.Sum()));
// push priors to GPU
log_priors_.Resize(prior_dim);
log_priors_.CopyFromVec(tmp_priors_f);
// push to GPU,
log_priors_ = Vector<BaseFloat>(log_priors);
}
......
......@@ -35,11 +35,11 @@ namespace nnet1 {
struct PdfPriorOptions {
std::string class_frame_counts;
BaseFloat prior_scale;
BaseFloat prior_cutoff;
BaseFloat prior_floor;
PdfPriorOptions() : class_frame_counts(""),
prior_scale(1.0),
prior_cutoff(1e-10) {}
prior_floor(1e-10) {}
void Register(OptionsItf *po) {
po->Register("class-frame-counts", &class_frame_counts,
......@@ -48,8 +48,8 @@ struct PdfPriorOptions {
" or pre-softmax activations)");
po->Register("prior-scale", &prior_scale,
"Scaling factor to be applied on pdf-log-priors");
po->Register("prior-cutoff", &prior_cutoff,
"Classes with priors lower than cutoff will have 0 likelihood");
po->Register("prior-floor", &prior_floor,
"Flooring constatnt for prior probability (i.e. label rel. frequency)");
}
};
......
......@@ -15,7 +15,7 @@ BINFILES = nnet-train-frmshuff \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet nnet-initialize \
nnet-kl-hmm-acc nnet-kl-hmm-mat-to-component \
feat-to-post paste-post
feat-to-post paste-post train-transitions
OBJFILES =
......
......@@ -85,29 +85,26 @@ int main(int argc, char *argv[]) {
Nnet nnet;
nnet.Read(model_filename);
//optionally remove softmax
if (no_softmax && nnet.GetComponent(nnet.NumComponents()-1).GetType() ==
kaldi::nnet1::Component::kSoftmax) {
KALDI_LOG << "Removing softmax from the nnet " << model_filename;
nnet.RemoveComponent(nnet.NumComponents()-1);
// optionally remove softmax,
Component::ComponentType last_type = nnet.GetComponent(nnet.NumComponents()-1).GetType();
if (no_softmax) {
if (last_type == Component::kSoftmax || last_type == Component::kBlockSoftmax) {
KALDI_LOG << "Removing " << Component::TypeToMarker(last_type) << " from the nnet " << model_filename;
nnet.RemoveComponent(nnet.NumComponents()-1);
} else {
KALDI_WARN << "Cannot remove softmax using --no-softmax=true, as the last component is " << Component::TypeToMarker(last_type);
}
}
//check for some non-sense option combinations
// avoid some bad option combinations,
if (apply_log && no_softmax) {
KALDI_ERR << "Nonsense option combination : --apply-log=true and --no-softmax=true";
}
if (apply_log && nnet.GetComponent(nnet.NumComponents()-1).GetType() !=
kaldi::nnet1::Component::kSoftmax) {
KALDI_ERR << "Used --apply-log=true, but nnet " << model_filename
<< " does not have <softmax> as last component!";
}
PdfPrior pdf_prior(prior_opts);
if (prior_opts.class_frame_counts != "" && (!no_softmax && !apply_log)) {
KALDI_ERR << "Option --class-frame-counts has to be used together with "
<< "--no-softmax or --apply-log";
KALDI_ERR << "Cannot use both --apply-log=true --no-softmax=true, use only one of the two!";
}
// disable dropout
// we will subtract log-priors later,
PdfPrior pdf_prior(prior_opts);
// disable dropout,
nnet_transf.SetDropoutRetention(1.0);
nnet.SetDropoutRetention(1.0);
......@@ -127,14 +124,14 @@ int main(int argc, char *argv[]) {
for (; !feature_reader.Done(); feature_reader.Next()) {
// read
Matrix<BaseFloat> mat = feature_reader.Value();
std::string utt = feature_reader.Key();
KALDI_VLOG(2) << "Processing utterance " << num_done+1
<< ", " << feature_reader.Key()
<< ", " << utt
<< ", " << mat.NumRows() << "frm";
//check for NaN/inf
BaseFloat sum = mat.Sum();
if (!KALDI_ISFINITE(sum)) {
KALDI_ERR << "NaN or inf found in features of " << feature_reader.Key();
if (!KALDI_ISFINITE(mat.Sum())) { // check there's no nan/inf,
KALDI_ERR << "NaN or inf found in features for " << utt;
}
// time-shift, copy the last frame of LSTM input N-times,
......@@ -146,23 +143,43 @@ int main(int argc, char *argv[]) {
}
}
// push it to gpu
// push it to gpu,
feats = mat;
// fwd-pass
// fwd-pass, feature transform,
nnet_transf.Feedforward(feats, &feats_transf);
if (!KALDI_ISFINITE(feats_transf.Sum())) { // check there's no nan/inf,
KALDI_ERR << "NaN or inf found in transformed-features for " << utt;
}
// fwd-pass, nnet,
nnet.Feedforward(feats_transf, &nnet_out);
if (!KALDI_ISFINITE(nnet_out.Sum())) { // check there's no nan/inf,
KALDI_ERR << "NaN or inf found in nn-output for " << utt;
}
// convert posteriors to log-posteriors
// convert posteriors to log-posteriors,
if (apply_log) {