Commit 71ef8633 authored by Dan Povey's avatar Dan Povey
Browse files

trunk: checking in some partial work towards nnet1->nnet2 conversion (still...

trunk: checking in some partial work towards nnet1->nnet2 conversion (still compiles but the program doesn't run.).

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4246 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 3ef595c7
......@@ -41,7 +41,7 @@ if [ ! -f $dir/final.mdl ]; then
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--num-jobs-nnet 4 \
--num-epochs-extra 10 --add-layers-period 1 \
--num-epochs 8 --num-epochs-extra 5 --add-layers-period 1 \
--num-hidden-layers 2 \
--mix-up 4000 \
--initial-learning-rate 0.02 --final-learning-rate 0.004 \
......
......@@ -14,8 +14,9 @@ use_graphs=false
# Begin configuration.
scale_opts="--transition-scale=1.0 --self-loop-scale=0.1"
acoustic_scale=0.1
beam=20.0
lattice_beam=10.0
beam=15.0
lattice_beam=8.0
max_active=750
transform_dir= # directory to find fMLLR transforms in.
top_n_words=100 # Number of common words that we compile into each graph (most frequent
# in $lang/text.
......@@ -116,7 +117,8 @@ if [ $stage -le 0 ]; then
compile-train-graphs-fsts $scale_opts --read-disambig-syms=$lang/phones/disambig.int \
$dir/tree $dir/final.mdl $lang/L_disambig.fst ark:- ark:- \| \
gmm-latgen-faster --acoustic-scale=$acoustic_scale --beam=$beam \
--lattice-beam=$lattice_beam --word-symbol-table=$lang/words.txt \
--max-active=$max_active --lattice-beam=$lattice_beam \
--word-symbol-table=$lang/words.txt \
$dir/final.mdl ark:- "$feats" ark:- \| \
lattice-oracle ark:- "ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|" \
ark,t:- ark,t:$dir/edits.JOB.txt \| \
......@@ -157,9 +159,9 @@ if [ $stage -le 1 ]; then
<(awk '{$1="";print;}' <$dir/text) > $dir/all_info.txt
sort -nr -k2 $dir/all_info.txt > $dir/all_info.sorted.txt
if $cleanup; then
rm $dir/edits.*.txt $dir/aligned_ref.*.txt
fi
# TEMP, will uncomment this later.
#if $cleanup; then
# rm $dir/edits.*.txt $dir/aligned_ref.*.txt
#fi
fi
......@@ -290,9 +290,9 @@ class CuMatrixBase {
void InvertElements();
/// B = alpha * A
void AddMat(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA = kNoTrans);
/// B = alpha * row + beta * B
/// (for each column c of *this), c = alpha * col + beta * c
void AddVecToCols(Real alpha, const CuVectorBase<Real> &col, Real beta = 1.0);
/// B = alpha * row + beta * B
/// (for each row r of *this), r = alpha * row + beta * r
void AddVecToRows(Real alpha, const CuVectorBase<Real> &row, Real beta = 1.0);
/// C = alpha * A(^T)*B(^T) + beta * C
void AddMatMat(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA,
......
......@@ -171,7 +171,7 @@ class AffineTransform : public UpdatableComponent {
}
/// Accessors to the component parameters
const CuVector<BaseFloat>& GetBias() {
const CuVector<BaseFloat>& GetBias() const {
return bias_;
}
......@@ -180,7 +180,7 @@ class AffineTransform : public UpdatableComponent {
bias_.CopyFromVec(bias);
}
const CuMatrix<BaseFloat>& GetLinearity() {
const CuMatrix<BaseFloat>& GetLinearity() const {
return linearity_;
}
......@@ -190,11 +190,11 @@ class AffineTransform : public UpdatableComponent {
linearity_.CopyFromMat(linearity);
}
const CuVector<BaseFloat>& GetBiasCorr() {
const CuVector<BaseFloat>& GetBiasCorr() const {
return bias_corr_;
}
const CuMatrix<BaseFloat>& GetLinearityCorr() {
const CuMatrix<BaseFloat>& GetLinearityCorr() const {
return linearity_corr_;
}
......
......@@ -506,7 +506,7 @@ void UnitTestAffinePreconInputComponent() {
void UnitTestBlockAffineComponent() {
BaseFloat learning_rate = 0.01,
param_stddev = 0.1, bias_stddev = 1.0;
param_stddev = 0.1, bias_stddev = 0.1;
int32 num_blocks = 1 + rand() % 3,
input_dim = num_blocks * (2 + rand() % 4),
output_dim = num_blocks * (2 + rand() % 4);
......@@ -655,6 +655,28 @@ void UnitTestFixedAffineComponent() {
}
}
void UnitTestFixedScaleComponent() {
int32 m = 1 + rand() % 20;
{
CuVector<BaseFloat> vec(m);
vec.SetRandn();
FixedScaleComponent component;
component.Init(vec);
UnitTestGenericComponentInternal(component);
}
}
void UnitTestFixedBiasComponent() {
int32 m = 1 + rand() % 20;
{
CuVector<BaseFloat> vec(m);
vec.SetRandn();
FixedBiasComponent component;
component.Init(vec);
UnitTestGenericComponentInternal(component);
}
}
void UnitTestParsing() {
......@@ -825,6 +847,8 @@ int main() {
UnitTestDctComponent();
UnitTestFixedLinearComponent();
UnitTestFixedAffineComponent();
UnitTestFixedScaleComponent();
UnitTestFixedBiasComponent();
UnitTestAffineComponentPreconditioned();
UnitTestAffineComponentPreconditionedOnline();
UnitTestAffineComponentModified();
......
......@@ -98,6 +98,10 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new FixedLinearComponent();
} else if (component_type == "FixedAffineComponent") {
ans = new FixedAffineComponent();
} else if (component_type == "FixedScaleComponent") {
ans = new FixedScaleComponent();
} else if (component_type == "FixedBiasComponent") {
ans = new FixedBiasComponent();
} else if (component_type == "SpliceComponent") {
ans = new SpliceComponent();
} else if (component_type == "SpliceMaxComponent") {
......@@ -1071,6 +1075,18 @@ AffineComponent::AffineComponent(const AffineComponent &component):
bias_params_(component.bias_params_),
is_gradient_(component.is_gradient_) { }
AffineComponent::AffineComponent(const CuMatrix<BaseFloat> &linear_params,
const CuVector<BaseFloat> &bias_params,
BaseFloat learning_rate):
UpdatableComponent(learning_rate),
linear_params_(linear_params),
bias_params_(bias_params) {
KALDI_ASSERT(linear_params.NumRows() == bias_params.Dim()&&
bias_params.Dim() != 0);
}
void AffineComponent::SetZero(bool treat_as_gradient) {
if (treat_as_gradient) {
SetLearningRate(1.0);
......@@ -4159,6 +4175,142 @@ void FixedAffineComponent::Read(std::istream &is, bool binary) {
}
void FixedScaleComponent::Init(const CuVectorBase<BaseFloat> &scales) {
KALDI_ASSERT(scales.Dim() != 0);
scales_ = scales;
}
void FixedScaleComponent::InitFromString(std::string args) {
std::string orig_args = args;
std::string filename;
bool ok = ParseFromString("scales", &args, &filename);
if (!ok || !args.empty())
KALDI_ERR << "Invalid initializer for layer of type "
<< Type() << ": \"" << orig_args << "\"";
CuVector<BaseFloat> vec;
ReadKaldiObject(filename, &vec);
Init(vec);
}
std::string FixedScaleComponent::Info() const {
std::stringstream stream;
BaseFloat scales_size = static_cast<BaseFloat>(scales_.Dim()),
scales_mean = scales_.Sum() / scales_size,
scales_stddev = std::sqrt(VecVec(scales_, scales_) / scales_size)
- (scales_mean * scales_mean);
stream << Component::Info() << ", scales-mean=" << scales_mean
<< ", scales-stddev=" << scales_stddev;
return stream.str();
}
void FixedScaleComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const {
*out = in;
out->MulColsVec(scales_);
}
void FixedScaleComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
const CuMatrixBase<BaseFloat> &, // out_value
const CuMatrixBase<BaseFloat> &out_deriv,
int32, // num_chunks
Component *, // to_update
CuMatrix<BaseFloat> *in_deriv) const {
*in_deriv = out_deriv;
in_deriv->MulColsVec(scales_);
}
Component* FixedScaleComponent::Copy() const {
FixedScaleComponent *ans = new FixedScaleComponent();
ans->scales_ = scales_;
return ans;
}
void FixedScaleComponent::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<FixedScaleComponent>");
WriteToken(os, binary, "<Scales>");
scales_.Write(os, binary);
WriteToken(os, binary, "</FixedScaleComponent>");
}
void FixedScaleComponent::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<FixedScaleComponent>", "<Scales>");
scales_.Read(is, binary);
ExpectToken(is, binary, "</FixedScaleComponent>");
}
void FixedBiasComponent::Init(const CuVectorBase<BaseFloat> &bias) {
KALDI_ASSERT(bias.Dim() != 0);
bias_ = bias;
}
void FixedBiasComponent::InitFromString(std::string args) {
std::string orig_args = args;
std::string filename;
bool ok = ParseFromString("bias", &args, &filename);
if (!ok || !args.empty())
KALDI_ERR << "Invalid initializer for layer of type "
<< Type() << ": \"" << orig_args << "\"";
CuVector<BaseFloat> vec;
ReadKaldiObject(filename, &vec);
Init(vec);
}
std::string FixedBiasComponent::Info() const {
std::stringstream stream;
BaseFloat bias_size = static_cast<BaseFloat>(bias_.Dim()),
bias_mean = bias_.Sum() / bias_size,
bias_stddev = std::sqrt(VecVec(bias_, bias_) / bias_size)
- (bias_mean * bias_mean);
stream << Component::Info() << ", bias-mean=" << bias_mean
<< ", bias-stddev=" << bias_stddev;
return stream.str();
}
void FixedBiasComponent::Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const {
*out = in;
out->AddVecToRows(1.0, bias_, 1.0);
}
void FixedBiasComponent::Backprop(const CuMatrixBase<BaseFloat> &, // in_value
const CuMatrixBase<BaseFloat> &, // out_value
const CuMatrixBase<BaseFloat> &out_deriv,
int32, // num_chunks
Component *, // to_update
CuMatrix<BaseFloat> *in_deriv) const {
*in_deriv = out_deriv;
}
Component* FixedBiasComponent::Copy() const {
FixedBiasComponent *ans = new FixedBiasComponent();
ans->bias_ = bias_;
return ans;
}
void FixedBiasComponent::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<FixedBiasComponent>");
WriteToken(os, binary, "<Bias>");
bias_.Write(os, binary);
WriteToken(os, binary, "</FixedBiasComponent>");
}
void FixedBiasComponent::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<FixedBiasComponent>", "<Bias>");
bias_.Read(is, binary);
ExpectToken(is, binary, "</FixedBiasComponent>");
}
std::string DropoutComponent::Info() const {
......
......@@ -638,6 +638,11 @@ class AffineComponent: public UpdatableComponent {
friend class SoftmaxComponent; // Friend declaration relates to mixing up.
public:
explicit AffineComponent(const AffineComponent &other);
// The next constructor is used in converting from nnet1.
AffineComponent(const CuMatrix<BaseFloat> &linear_params,
const CuVector<BaseFloat> &bias_params,
BaseFloat learning_rate);
virtual int32 InputDim() const { return linear_params_.NumCols(); }
virtual int32 OutputDim() const { return linear_params_.NumRows(); }
void Init(BaseFloat learning_rate,
......@@ -1607,6 +1612,81 @@ class FixedAffineComponent: public Component {
};
/// FixedScaleComponent applies a fixed per-element scale; it's similar
/// to the Rescale component in the nnet1 setup (and only needed for nnet1
/// model conversion.
class FixedScaleComponent: public Component {
public:
FixedScaleComponent() { }
virtual std::string Type() const { return "FixedScaleComponent"; }
virtual std::string Info() const;
void Init(const CuVectorBase<BaseFloat> &scales);
// InitFromString takes only the option scales=<string>,
// where the string is the filename of a Kaldi-format matrix to read.
virtual void InitFromString(std::string args);
virtual int32 InputDim() const { return scales_.Dim(); }
virtual int32 OutputDim() const { return scales_.Dim(); }
virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const;
virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
int32 num_chunks,
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
virtual bool BackpropNeedsInput() const { return false; }
virtual bool BackpropNeedsOutput() const { return false; }
virtual Component* Copy() const;
virtual void Read(std::istream &is, bool binary);
virtual void Write(std::ostream &os, bool binary) const;
protected:
CuVector<BaseFloat> scales_;
KALDI_DISALLOW_COPY_AND_ASSIGN(FixedScaleComponent);
};
/// FixedBiasComponent applies a fixed per-element bias; it's similar
/// to the AddShift component in the nnet1 setup (and only needed for nnet1
/// model conversion.
class FixedBiasComponent: public Component {
public:
FixedBiasComponent() { }
virtual std::string Type() const { return "FixedBiasComponent"; }
virtual std::string Info() const;
void Init(const CuVectorBase<BaseFloat> &scales);
// InitFromString takes only the option bias=<string>,
// where the string is the filename of a Kaldi-format matrix to read.
virtual void InitFromString(std::string args);
virtual int32 InputDim() const { return bias_.Dim(); }
virtual int32 OutputDim() const { return bias_.Dim(); }
virtual void Propagate(const CuMatrixBase<BaseFloat> &in,
int32 num_chunks,
CuMatrix<BaseFloat> *out) const;
virtual void Backprop(const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
int32 num_chunks,
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
virtual bool BackpropNeedsInput() const { return false; }
virtual bool BackpropNeedsOutput() const { return false; }
virtual Component* Copy() const;
virtual void Read(std::istream &is, bool binary);
virtual void Write(std::ostream &os, bool binary) const;
protected:
CuVector<BaseFloat> bias_;
KALDI_DISALLOW_COPY_AND_ASSIGN(FixedBiasComponent);
};
/// This Component, if present, randomly zeroes half of
/// the inputs and multiplies the other half by two.
/// Typically you would use this in training but not in
......
......@@ -26,15 +26,18 @@ BINFILES = nnet-randomize-frames nnet-am-info nnet-init \
nnet-modify-learning-rates nnet-normalize-stddev nnet-perturb-egs \
nnet-perturb-egs-fmllr nnet-get-weighted-egs nnet-adjust-priors \
cuda-compiled nnet-replace-last-layers nnet-am-switch-preconditioning \
nnet-train-simple-perturbed nnet-train-parallel-perturbed
nnet-train-simple-perturbed nnet-train-parallel-perturbed \
nnet1-to-raw-nnet
OBJFILES =
# Add this dependency to force cuda-compiled.o to be rebuilt when we reconfigure.
cuda-compiled.o: ../kaldi.mk
TESTFILES =
ADDLIBS = ../nnet2/kaldi-nnet2.a ../gmm/kaldi-gmm.a \
ADDLIBS = ../nnet2/kaldi-nnet2.a ../nnet/kaldi-nnet.a ../gmm/kaldi-gmm.a \
../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a \
../transform/kaldi-transform.a ../tree/kaldi-tree.a ../thread/kaldi-thread.a \
../cudamatrix/kaldi-cudamatrix.a ../matrix/kaldi-matrix.a \
......
......@@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
"Initialize the neural network from a config file with a line for each\n"
"component. Note, this only outputs the neural net itself, not the associated\n"
"information such as the transition-model; you'll probably want to pipe\n"
"the output into something like am-nnet-init.\n"
"the output into something like nnet-am-init.\n"
"\n"
"Usage: nnet-init [options] <config-in> <raw-nnet-out>\n"
"e.g.:\n"
......
// nnet2bin/nnet1-to-raw-nnet.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-affine-transform.h"
// may need more includes here.
#include "nnet2/nnet-nnet.h"
namespace kaldi {
nnet2::Component *ConvertAffineTransformComponent(
const nnet1::Component &nnet1_component) {
const nnet1::AffineTransform *affine =
dynamic_cast<const nnet1::AffineTransform*>(&nnet1_component);
KALDI_ASSERT(affine != NULL);
// default learning rate is 1.0e-05, you can use the --learning-rate or
// --learning-rates option to nnet-am-copy to change it if you need.
BaseFloat learning_rate = 1.0e-05;
return new nnet2::AffineComponent(affine->GetLinearity(),
affine->GetBiasCorr(),
learning_rate);
}
nnet2::Component *ConvertComponent(const nnet1::Component &nnet1_component) {
nnet1::Component::ComponentType type_in = nnet1_component.GetType();
switch (type_in) {
case nnet1::Component::kAffineTransform:
return ConvertAffineTransformComponent(nnet1_component);
/* case nnet1::Component::kSoftmax:
return ConvertSoftmaxComponent(nnet1_component);
case nnet1::Component::kSigmoid:
return ConvertSigmoidComponent(nnet1_component);
case nnet1::Component::kSplice:
return ConvertSpliceComponent(nnet1_component); // note, this will for now only handle the
// special case nnet1::Component::where all splice indexes in nnet1_component are contiguous, e.g.
// -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5 .
case nnet1::Component::kAddShift:
return ConvertAddShiftComponent(nnet1_component); // convert to FixedBiasComponent
case nnet1::Component::kRescale
return ConvertRescaleComponent(nnet1_component); // convert to FixedScaleComponent
*/
default: KALDI_ERR << "Un-handled nnet1 component type "
<< nnet1::Component::TypeToMarker(type_in);
return NULL;
}
}
nnet2::Nnet *ConvertNnet1ToNnet2(const nnet1::Nnet &nnet1) {
// get a vector of nnet2::Component pointers and initialize the nnet2::Nnet with it.
return NULL;
}
}
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Convert nnet1 neural net to nnet2 'raw' neural net\n"
""
"\n"
"Usage: nnet1-to-raw-nnet [options] <nnet1-in> <nnet2-out>\n"
"e.g.:\n"
" nnet1-to-raw-nnet srcdir/final.nnet - | nnet-am-init dest/tree dest/topo - dest/0.mdl\n";
KALDI_ERR << "This program is not finished.";
bool binary_write = true;
int32 srand_seed = 0;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Read(argc, argv);
srand(srand_seed);
if (po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
std::string nnet1_rxfilename = po.GetArg(1),
raw_nnet2_wxfilename = po.GetArg(2);
nnet1::Nnet nnet1;
ReadKaldiObject(nnet1_rxfilename, &nnet1);
nnet2::Nnet *nnet2 = ConvertNnet1ToNnet2(nnet1);
WriteKaldiObject(*nnet2, raw_nnet2_wxfilename, binary_write);
KALDI_LOG << "Converted nnet1 neural net to raw nnet2 and wrote it to "
<< PrintableWxfilename(raw_nnet2_wxfilename);
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}
......@@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
"Concatenate two 'raw' neural nets, e.g. as output by nnet-init or\n"
"nnet-to-raw-nnet\n"
"\n"
"Usage: raw-nnet-concat [options] <raw-nnet1-in> <raw-nnet2-in> <raw-nnet-out>\n"
"Usage: raw-nnet-concat [options] <raw-nnet-in1> <raw-nnet-in2> <raw-nnet-out>\n"
"e.g.:\n"
" raw-nnet-concat nnet1 nnet2 nnet_concat\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment