Commit 13c78d3b authored by Karel Vesely's avatar Karel Vesely
Browse files

Merge pull request #66 from nichongjia/blstm

blstm remove bug
parents 9c257c5a 7afae3f8
......@@ -58,17 +58,17 @@ print "<NnetProto>"
# normally we won't use more than 2 layers of LSTM
if o.num_layers == 1:
print "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
(feat_dim, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
elif o.num_layers == 2:
print "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(feat_dim, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
(feat_dim, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
print "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \
(o.num_recurrent, o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
(2*o.num_recurrent, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)
else:
sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n")
sys.exit(1)
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" % \
(o.num_recurrent, num_leaves, o.param_stddev_factor)
(2*o.num_recurrent, num_leaves, o.param_stddev_factor)
print "<Softmax> <InputDim> %d <OutputDim> %d" % \
(num_leaves, num_leaves)
print "</NnetProto>"
......
This diff is collapsed.
......@@ -120,7 +120,7 @@ Component* Component::NewComponentOfType(ComponentType comp_type,
ans = new LstmProjectedStreams(input_dim, output_dim);
break;
case Component::kBLstmProjectedStreams :
ans = new BLstmProjectedStreams(input_dim, output_dim);
ans = new BLstmProjectedStreams(input_dim, output_dim);
break;
case Component::kSoftmax :
ans = new Softmax(input_dim, output_dim);
......
......@@ -91,7 +91,7 @@ class Component {
/// Convert component type to marker
static const char* TypeToMarker(ComponentType t);
/// Convert marker to component type (case insensitive)
static ComponentType MarkerToType(const std::string &s);
static ComponentType MarkerToType(const std::string &s);
/// General interface of a component
public:
......
......@@ -32,7 +32,7 @@ namespace nnet1 {
Nnet::Nnet(const Nnet& other) {
// copy the components
for(int32 i=0; i<other.NumComponents(); i++) {
for(int32 i = 0; i < other.NumComponents(); i++) {
components_.push_back(other.GetComponent(i).Copy());
}
// create empty buffers
......@@ -40,13 +40,13 @@ Nnet::Nnet(const Nnet& other) {
backpropagate_buf_.resize(NumComponents()+1);
// copy train opts
SetTrainOptions(other.opts_);
Check();
Check();
}
Nnet & Nnet::operator = (const Nnet& other) {
Destroy();
// copy the components
for(int32 i=0; i<other.NumComponents(); i++) {
for(int32 i = 0; i < other.NumComponents(); i++) {
components_.push_back(other.GetComponent(i).Copy());
}
// create empty buffers
......@@ -356,15 +356,19 @@ void Nnet::ResetLstmStreams(const std::vector<int32> &stream_reset_flag) {
if (GetComponent(c).GetType() == Component::kLstmProjectedStreams) {
LstmProjectedStreams& comp = dynamic_cast<LstmProjectedStreams&>(GetComponent(c));
comp.ResetLstmStreams(stream_reset_flag);
}
}
}
}
void Nnet::SetSeqLengths(const std::vector<int32> &sequence_lengths) {
for (int32 c=0; c < NumComponents(); c++) {
if (GetComponent(c).GetType() == Component::kBLstmProjectedStreams) {
BLstmProjectedStreams& comp = dynamic_cast<BLstmProjectedStreams&>(GetComponent(c));
comp.ResetLstmStreams(stream_reset_flag);
comp.SetSeqLengths(sequence_lengths);
}
}
}
void Nnet::Init(const std::string &file) {
Input in(file);
std::istream &is = in.Stream();
......
......@@ -36,23 +36,23 @@ namespace nnet1 {
class Nnet {
public:
Nnet() {}
Nnet(const Nnet& other); // Copy constructor.
Nnet(const Nnet& other); // Copy constructor.
Nnet &operator = (const Nnet& other); // Assignment operator.
~Nnet();
~Nnet();
public:
/// Perform forward pass through the network
void Propagate(const CuMatrixBase<BaseFloat> &in, CuMatrix<BaseFloat> *out);
void Propagate(const CuMatrixBase<BaseFloat> &in, CuMatrix<BaseFloat> *out);
/// Perform backward pass through the network
void Backpropagate(const CuMatrixBase<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff);
/// Perform forward pass through the network, don't keep buffers (use it when not training)
void Feedforward(const CuMatrixBase<BaseFloat> &in, CuMatrix<BaseFloat> *out);
void Feedforward(const CuMatrixBase<BaseFloat> &in, CuMatrix<BaseFloat> *out);
/// Dimensionality on network input (input feature dim.)
int32 InputDim() const;
int32 InputDim() const;
/// Dimensionality of network outputs (posteriors | bn-features | etc.)
int32 OutputDim() const;
int32 OutputDim() const;
/// Returns number of components-- think of this as similar to # of layers, but
/// e.g. the nonlinearity and the linear part count as separate components,
......@@ -65,7 +65,7 @@ class Nnet {
/// Sets the c'th component to "component", taking ownership of the pointer
/// and deleting the corresponding one that we own.
void SetComponent(int32 c, Component *component);
/// Appends this component to the components already in the neural net.
/// Takes ownership of the pointer
void AppendComponent(Component *dynamically_allocated_comp);
......@@ -77,12 +77,12 @@ class Nnet {
void RemoveLastComponent() { RemoveComponent(NumComponents()-1); }
/// Access to forward pass buffers
const std::vector<CuMatrix<BaseFloat> >& PropagateBuffer() const {
return propagate_buf_;
const std::vector<CuMatrix<BaseFloat> >& PropagateBuffer() const {
return propagate_buf_;
}
/// Access to backward pass buffers
const std::vector<CuMatrix<BaseFloat> >& BackpropagateBuffer() const {
return backpropagate_buf_;
const std::vector<CuMatrix<BaseFloat> >& BackpropagateBuffer() const {
return backpropagate_buf_;
}
/// Get the number of parameters in the network
......@@ -96,22 +96,25 @@ class Nnet {
/// Get the gradient stored in the network
void GetGradient(Vector<BaseFloat>* grad_copy) const;
/// Set the dropout rate
/// Set the dropout rate
void SetDropoutRetention(BaseFloat r);
/// Reset streams in LSTM multi-stream training,
void ResetLstmStreams(const std::vector<int32> &stream_reset_flag);
/// set sequence length in LSTM multi-stream training
void SetSeqLengths(const std::vector<int32> &sequence_lengths);
/// Initialize MLP from config
void Init(const std::string &config_file);
/// Read the MLP from file (can add layers to exisiting instance of Nnet)
void Read(const std::string &file);
void Read(const std::string &file);
/// Read the MLP from stream (can add layers to exisiting instance of Nnet)
void Read(std::istream &in, bool binary);
void Read(std::istream &in, bool binary);
/// Write MLP to file
void Write(const std::string &file, bool binary) const;
/// Write MLP to stream
void Write(std::ostream &out, bool binary) const;
/// Write MLP to stream
void Write(std::ostream &out, bool binary) const;
/// Create string with human readable description of the nnet
std::string Info() const;
/// Create string with per-component gradient statistics
......@@ -135,18 +138,17 @@ class Nnet {
private:
/// Vector which contains all the components composing the neural network,
/// the components are for example: AffineTransform, Sigmoid, Softmax
std::vector<Component*> components_;
std::vector<Component*> components_;
std::vector<CuMatrix<BaseFloat> > propagate_buf_; ///< buffers for forward pass
std::vector<CuMatrix<BaseFloat> > backpropagate_buf_; ///< buffers for backward pass
std::vector<CuMatrix<BaseFloat> > propagate_buf_; ///< buffers for forward pass
std::vector<CuMatrix<BaseFloat> > backpropagate_buf_; ///< buffers for backward pass
/// Option class with hyper-parameters passed to UpdatableComponent(s)
NnetTrainOptions opts_;
};
} // namespace nnet1
} // namespace kaldi
} // namespace nnet1
} // namespace kaldi
#endif // KALDI_NNET_NNET_NNET_H_
......@@ -10,7 +10,7 @@ BINFILES = nnet-train-frmshuff \
nnet-train-perutt \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
nnet-train-lstm-streams \
nnet-train-lstm-streams nnet-train-blstm-streams \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet nnet-initialize \
......
// nnetbin/nnet-train-blstm-parallel.cc
// Copyright 2015 Chongjia Ni
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet/nnet-trnopts.h"
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-loss.h"
#include "nnet/nnet-randomizer.h"
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "base/timer.h"
#include "cudamatrix/cu-device.h"
int main(int argc, char *argv[]) {
using namespace kaldi;
using namespace kaldi::nnet1;
typedef kaldi::int32 int32;
try {
const char *usage =
"Perform one iteration of senones training by SGD.\n"
"The updates are done per-utternace and by processing multiple utterances in parallel.\n"
"\n"
"Usage: nnet-train-blstm-streams [options] <feature-rspecifier> <labels-rspecifier> <model-in> [<model-out>]\n"
"e.g.: \n"
" nnet-train-blstm-streams scp:feature.scp ark:labels.ark nnet.init nnet.iter1\n";
ParseOptions po(usage);
// training options
NnetTrainOptions trn_opts;
trn_opts.Register(&po);
bool binary = true,
crossvalidate = false;
po.Register("binary", &binary, "Write model in binary mode");
po.Register("cross-validate", &crossvalidate, "Perform cross-validation (no backpropagation)");
std::string feature_transform;
po.Register("feature-transform", &feature_transform, "Feature transform in Nnet format");
int32 length_tolerance = 5;
po.Register("length-tolerance", &length_tolerance, "Allowed length difference of features/targets (frames)");
std::string frame_weights;
po.Register("frame-weights", &frame_weights, "Per-frame weights to scale gradients (frame selection/weighting).");
std::string objective_function = "xent";
po.Register("objective-function", &objective_function, "Objective function : xent|mse");
int32 num_streams = 4;
po.Register("num_streams", &num_streams, "Number of sequences processed in parallel");
double frame_limit = 100000;
po.Register("frame-limit", &frame_limit, "Max number of frames to be processed");
int32 report_step = 100;
po.Register("report-step", &report_step, "Step (number of sequences) for status reporting");
std::string use_gpu = "yes";
// po.Register("use-gpu", &use_gpu, "yes|no|optional, only has effect if compiled with CUDA");
po.Read(argc, argv);
if (po.NumArgs() != 4-(crossvalidate?1:0)) {
po.PrintUsage();
exit(1);
}
std::string feature_rspecifier = po.GetArg(1),
targets_rspecifier = po.GetArg(2),
model_filename = po.GetArg(3);
std::string target_model_filename;
if (!crossvalidate) {
target_model_filename = po.GetArg(4);
}
using namespace kaldi;
using namespace kaldi::nnet1;
typedef kaldi::int32 int32;
Vector<BaseFloat> weights;
// Select the GPU
#if HAVE_CUDA == 1
CuDevice::Instantiate().SelectGpuId(use_gpu);
#endif
Nnet nnet_transf;
if ( feature_transform != "" ) {
nnet_transf.Read(feature_transform);
}
Nnet nnet;
nnet.Read(model_filename);
nnet.SetTrainOptions(trn_opts);
kaldi::int64 total_frames = 0;
// Initialize feature ans labels readers
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessPosteriorReader targets_reader(targets_rspecifier);
RandomAccessBaseFloatVectorReader weights_reader;
if (frame_weights != "") {
weights_reader.Open(frame_weights);
}
Xent xent;
Mse mse;
CuMatrix<BaseFloat> feats, feats_transf, nnet_out, obj_diff;
Timer time;
KALDI_LOG << (crossvalidate?"CROSS-VALIDATION":"TRAINING") << " STARTED";
// Feature matrix of every utterance
std::vector< Matrix<BaseFloat> > feats_utt(num_streams);
// Label vector of every utterance
std::vector< Posterior > labels_utt(num_streams);
std::vector< Vector<BaseFloat> > weights_utt(num_streams);
int32 feat_dim = nnet.InputDim();
int32 num_done = 0, num_no_tgt_mat = 0, num_other_error = 0;
while (1) {
std::vector<int32> frame_num_utt;
int32 sequence_index = 0, max_frame_num = 0;
for ( ; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
// Check that we have targets
if (!targets_reader.HasKey(utt)) {
KALDI_WARN << utt << ", missing targets";
num_no_tgt_mat++;
continue;
}
// Get feature / target pair
Matrix<BaseFloat> mat = feature_reader.Value();
Posterior targets = targets_reader.Value(utt);
if (frame_weights != "") {
weights = weights_reader.Value(utt);
} else { // all per-frame weights are 1.0
weights.Resize(mat.NumRows());
weights.Set(1.0);
}
// correct small length mismatch ... or drop sentence
{
// add lengths to vector
std::vector<int32> lenght;
lenght.push_back(mat.NumRows());
lenght.push_back(targets.size());
lenght.push_back(weights.Dim());
// find min, max
int32 min = *std::min_element(lenght.begin(), lenght.end());
int32 max = *std::max_element(lenght.begin(), lenght.end());
// fix or drop ?
if (max - min < length_tolerance) {
if (mat.NumRows() != min) mat.Resize(min, mat.NumCols(), kCopyData);
if (targets.size() != min) targets.resize(min);
if (weights.Dim() != min) weights.Resize(min, kCopyData);
} else {
KALDI_WARN << utt << ", length mismatch of targets " << targets.size()
<< " and features " << mat.NumRows();
num_other_error++;
continue;
}
}
if (max_frame_num < mat.NumRows()) max_frame_num = mat.NumRows();
feats_utt[sequence_index] = mat;
labels_utt[sequence_index] = targets;
weights_utt[sequence_index] = weights;
frame_num_utt.push_back(mat.NumRows());
sequence_index++;
// If the total number of frames reaches frame_limit, then stop adding more sequences, regardless of whether
// the number of utterances reaches num_sequence or not.
if (frame_num_utt.size() == num_streams || frame_num_utt.size() * max_frame_num > frame_limit) {
feature_reader.Next(); break;
}
}
int32 cur_sequence_num = frame_num_utt.size();
// Create the final feature matrix. Every utterance is padded to the max length within this group of utterances
Matrix<BaseFloat> feat_mat_host(cur_sequence_num * max_frame_num, feat_dim, kSetZero);
Posterior target_host;
Vector<BaseFloat> weight_host;
target_host.resize(cur_sequence_num * max_frame_num);
weight_host.Resize(cur_sequence_num * max_frame_num, kSetZero);
for (int s = 0; s < cur_sequence_num; s++) {
Matrix<BaseFloat> mat_tmp = feats_utt[s];
for (int r = 0; r < frame_num_utt[s]; r++) {
feat_mat_host.Row(r*cur_sequence_num + s).CopyFromVec(mat_tmp.Row(r));
}
}
for (int s = 0; s < cur_sequence_num; s++) {
Posterior target_tmp = labels_utt[s];
for (int r = 0; r < frame_num_utt[s]; r++) {
target_host[r*cur_sequence_num+s] = target_tmp[r];
}
Vector<BaseFloat> weight_tmp = weights_utt[s];
for (int r = 0; r < frame_num_utt[s]; r++) {
weight_host(r*cur_sequence_num+s) = weight_tmp(r);
}
}
// transform feature
nnet_transf.Feedforward(CuMatrix<BaseFloat>(feat_mat_host), &feats_transf);
// Set the original lengths of utterances before padding
nnet.SetSeqLengths(frame_num_utt);
// Propagation and xent training
nnet.Propagate(feats_transf, &nnet_out);
if (objective_function == "xent") {
// gradients re-scaled by weights in Eval,
xent.Eval(weight_host, nnet_out, target_host, &obj_diff);
} else if (objective_function == "mse") {
// gradients re-scaled by weights in Eval,
mse.Eval(weight_host, nnet_out, target_host, &obj_diff);
} else {
KALDI_ERR << "Unknown objective function code : " << objective_function;
}
// Backward pass
if (!crossvalidate) {
nnet.Backpropagate(obj_diff, NULL);
}
// 1st minibatch : show what happens in network
if (kaldi::g_kaldi_verbose_level >= 2 && total_frames == 0) { // vlog-1
KALDI_VLOG(1) << "### After " << total_frames << " frames,";
KALDI_VLOG(1) << nnet.InfoPropagate();
if (!crossvalidate) {
KALDI_VLOG(1) << nnet.InfoBackPropagate();
KALDI_VLOG(1) << nnet.InfoGradient();
}
}
num_done += cur_sequence_num;
total_frames += feats_transf.NumRows();
if (feature_reader.Done()) break; // end loop of while(1)
}
// Check network parameters and gradients when training finishes
if (kaldi::g_kaldi_verbose_level >= 1) { // vlog-1
KALDI_VLOG(1) << "### After " << total_frames << " frames,";
KALDI_VLOG(1) << nnet.InfoPropagate();
if (!crossvalidate) {
KALDI_VLOG(1) << nnet.InfoBackPropagate();
KALDI_VLOG(1) << nnet.InfoGradient();
}
}
if (!crossvalidate) {
nnet.Write(target_model_filename, binary);
}
KALDI_LOG << "Done " << num_done << " files, " << num_no_tgt_mat
<< " with no tgt_mats, " << num_other_error
<< " with other errors. "
<< "[" << (crossvalidate?"CROSS-VALIDATION":"TRAINING")
<< ", " << time.Elapsed()/60 << " min, fps" << total_frames/time.Elapsed()
<< "]";
KALDI_LOG << xent.Report();
#if HAVE_CUDA == 1
CuDevice::Instantiate().PrintProfile();
#endif
return 0;
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment