Commit 9d4b994f authored by naxingyu's avatar naxingyu
Browse files

add more doc

parent 885586f9
...@@ -7,3 +7,6 @@ exp/tri5a/decode/cer_13:%WER 49.67 [ 27891 / 56154, 2877 ins, 4538 del, 20476 su ...@@ -7,3 +7,6 @@ exp/tri5a/decode/cer_13:%WER 49.67 [ 27891 / 56154, 2877 ins, 4538 del, 20476 su
exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ] exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ]
exp/tri5a_mmi_b0.1/decode/cer_11:%WER 44.24 [ 24840 / 56154, 2060 ins, 4118 del, 18662 sub ] exp/tri5a_mmi_b0.1/decode/cer_11:%WER 44.24 [ 24840 / 56154, 2060 ins, 4118 del, 18662 sub ]
exp/tri5a_mpe/decode/cer_12:%WER 44.96 [ 25247 / 56154, 2233 ins, 4174 del, 18840 sub ] exp/tri5a_mpe/decode/cer_12:%WER 44.96 [ 25247 / 56154, 2233 ins, 4174 del, 18840 sub ]
# ConvNet with 2 convolutional layers and 2 ReLU layers
exp/nnet2_convnet/decode/cer_10:%WER 40.73 [ 22873 / 56154, 2609 ins, 3712 del, 16552 sub ]
#!/bin/bash #!/bin/bash
# 2015 Xingyu Na # 2015 Xingyu Na
# This runs on the full training set, using ConvNet setup with # This script runs on the full training set, using ConvNet setup on top of
# Sigmoid affine layers, on top of fbank features, on GPU. # fbank features, on GPU. The ConvNet has four hidden layers, two convolutional
# layers and two affine transform layers with ReLU nonlinearity.
# Convolutional layer [1]:
# convolution1d, input feature dim is 36, filter dim is 7, output dim is
# 30, 128 filters are used
# maxpooling, 3-to-1 maxpooling, input dim is 30, output dim is 10
# Convolutional layer [2]:
# convolution1d, input feature dim is 10, filter dim is 4, output dim is
# 7, 256 filters are used
# Affine transform layers [3-4]:
# affine transform with ReLU nonlinearity.
temp_dir= temp_dir=
dir=exp/nnet2_convnet dir=exp/nnet2_convnet
...@@ -16,7 +26,7 @@ train=data-fb/train ...@@ -16,7 +26,7 @@ train=data-fb/train
. utils/parse_options.sh . utils/parse_options.sh
parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll
# likely have to change it. # likely have to change it.
# Make the FBANK features # Make the FBANK features
if [ $stage -le -5 ]; then if [ $stage -le -5 ]; then
...@@ -40,7 +50,7 @@ fi ...@@ -40,7 +50,7 @@ fi
--mix-up 20000 --samples-per-iter 300000 \ --mix-up 20000 --samples-per-iter 300000 \
--num-epochs 15 --delta-order 2 \ --num-epochs 15 --delta-order 2 \
--initial-effective-lrate 0.0005 --final-effective-lrate 0.000025 \ --initial-effective-lrate 0.0005 --final-effective-lrate 0.000025 \
--num-jobs-initial 3 --num-jobs-final 8 --num-hidden-layers 4 --splice-width 5 \ --num-jobs-initial 3 --num-jobs-final 8 --splice-width 5 \
--hidden-dim 2000 --num-filters1 128 --patch-dim1 7 --pool-size 3 \ --hidden-dim 2000 --num-filters1 128 --patch-dim1 7 --pool-size 3 \
--num-filters2 256 --patch-dim2 4 \ --num-filters2 256 --patch-dim2 4 \
$train data/lang exp/tri5a_ali $dir || exit 1; $train data/lang exp/tri5a_ali $dir || exit 1;
......
...@@ -4,10 +4,14 @@ ...@@ -4,10 +4,14 @@
# 2013 Xiaohui Zhang # 2013 Xiaohui Zhang
# 2013 Guoguo Chen # 2013 Guoguo Chen
# 2014 Vimal Manohar # 2014 Vimal Manohar
# 2015 Xingyu Na
# Apache 2.0. # Apache 2.0.
# train_convnet_accel2.sh is modified from train_pnorm_accel2.sh # train_convnet_accel2.sh is modified from train_pnorm_accel2.sh. It propotypes
# the training of a ConvNet. The ConvNet is composed of 4 layers. The first layer
# is a Convolutional1d component plus a Maxpooling component. The second layer
# is a single Convolutional1d component. The third and fourth layers are affine
# components with ReLU nonlinearities. Due to non-squashing output, normalize
# component is applied to all four layers.
# train_pnorm_accel2.sh is a modified form of train_pnorm_simple2.sh (the "2" # train_pnorm_accel2.sh is a modified form of train_pnorm_simple2.sh (the "2"
# suffix is because they both use the the "new" egs format, created by # suffix is because they both use the the "new" egs format, created by
...@@ -61,8 +65,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of ...@@ -61,8 +65,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of
# affect each others' gradients. # affect each others' gradients.
add_layers_period=2 # by default, add new layers every 2 iterations. add_layers_period=2 # by default, add new layers every 2 iterations.
num_hidden_layers=3 stage=-3
stage=-4
splice_width=4 # meaning +- 4 frames on each side for second LDA splice_width=4 # meaning +- 4 frames on each side for second LDA
left_context= # if set, overrides splice-width left_context= # if set, overrides splice-width
...@@ -129,7 +132,6 @@ if [ $# != 4 ]; then ...@@ -129,7 +132,6 @@ if [ $# != 4 ]; then
echo " --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training," echo " --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training,"
echo " # actual learning-rate is this time num-jobs." echo " # actual learning-rate is this time num-jobs."
echo " --final-effective-lrate <lrate|0.004> # effective learning rate at end of training." echo " --final-effective-lrate <lrate|0.004> # effective learning rate at end of training."
echo " --num-hidden-layers <#hidden-layers|2> # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
echo " --add-layers-period <#iters|2> # Number of iterations between adding hidden layers" echo " --add-layers-period <#iters|2> # Number of iterations between adding hidden layers"
echo " --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer," echo " --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer,"
echo " # per context-dependent state. Try a number several times #states." echo " # per context-dependent state. Try a number several times #states."
...@@ -148,7 +150,6 @@ if [ $# != 4 ]; then ...@@ -148,7 +150,6 @@ if [ $# != 4 ]; then
echo " # process." echo " # process."
echo " --splice-width <width|4> # Number of frames on each side to append for feature input" echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames" echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which" echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which"
echo " # realignment is to be done" echo " # realignment is to be done"
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh" echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
...@@ -156,6 +157,15 @@ if [ $# != 4 ]; then ...@@ -156,6 +157,15 @@ if [ $# != 4 ]; then
echo " --num-jobs-align <#njobs|30> # Number of jobs to perform realignment" echo " --num-jobs-align <#njobs|30> # Number of jobs to perform realignment"
echo " --stage <stage|-4> # Used to run a partially-completed training process from somewhere in" echo " --stage <stage|-4> # Used to run a partially-completed training process from somewhere in"
echo " # the middle." echo " # the middle."
echo "ConvNet configurations"
echo " --num-filters1 <num-filters1|128> # number of filters in the first convolutional layer."
echo " --patch-step1 <patch-step1|1> # patch step of the first convolutional layer."
echo " --patch-dim1 <patch-dim1|7> # dim of convolutional kernel in the first layer."
echo " # (note: (feat-dim - patch-dim1) % patch-step1 should be 0.)"
echo " --pool-size <pool-size|3> # size of pooling after the first convolutional layer."
echo " # (note: (feat-dim - patch-dim1 + 1) % pool-size should be 0.)"
echo " --num-filters2 <num-filters2|256> # number of filters in the second convolutional layer."
echo " --patch-dim2 <patch-dim2|4> # dim of convolutional kernel in the second layer."
exit 1; exit 1;
...@@ -266,7 +276,7 @@ if [ $stage -le -2 ]; then ...@@ -266,7 +276,7 @@ if [ $stage -le -2 ]; then
stddev=`perl -e "print 1.0/sqrt($hidden_dim);"` stddev=`perl -e "print 1.0/sqrt($hidden_dim);"`
cat >$dir/nnet.config <<EOF cat >$dir/nnet.config <<EOF
SpliceComponent input-dim=$delta_feat_dim left-context=$left_context right-context=$right_context SpliceComponent input-dim=$delta_feat_dim left-context=$left_context right-context=$right_context
ConvolutionComponent input-dim=$tot_input_dim output-dim=$conv_out_dim1 learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev patch-dim=$patch_dim1 patch-step=$patch_step1 patch-stride=$feat_dim Convolutional1dComponent input-dim=$tot_input_dim output-dim=$conv_out_dim1 learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev patch-dim=$patch_dim1 patch-step=$patch_step1 patch-stride=$feat_dim
MaxpoolingComponent input-dim=$conv_out_dim1 output-dim=$pool_out_dim pool-size=$pool_size pool-stride=$num_filters1 MaxpoolingComponent input-dim=$conv_out_dim1 output-dim=$pool_out_dim pool-size=$pool_size pool-stride=$num_filters1
NormalizeComponent dim=$pool_out_dim NormalizeComponent dim=$pool_out_dim
AffineComponentPreconditionedOnline input-dim=$pool_out_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0 AffineComponentPreconditionedOnline input-dim=$pool_out_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0
...@@ -274,7 +284,7 @@ SoftmaxComponent dim=$num_leaves ...@@ -274,7 +284,7 @@ SoftmaxComponent dim=$num_leaves
EOF EOF
cat >$dir/replace.1.config <<EOF cat >$dir/replace.1.config <<EOF
ConvolutionComponent input-dim=$pool_out_dim output-dim=$conv_out_dim2 learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev patch-dim=$patch_dim2 patch-step=$patch_step2 patch-stride=$patch_stride2 Convolutional1dComponent input-dim=$pool_out_dim output-dim=$conv_out_dim2 learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev patch-dim=$patch_dim2 patch-step=$patch_step2 patch-stride=$patch_stride2
NormalizeComponent dim=$conv_out_dim2 NormalizeComponent dim=$conv_out_dim2
AffineComponentPreconditionedOnline input-dim=$conv_out_dim2 output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0 AffineComponentPreconditionedOnline input-dim=$conv_out_dim2 output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0
SoftmaxComponent dim=$num_leaves SoftmaxComponent dim=$num_leaves
...@@ -282,7 +292,8 @@ EOF ...@@ -282,7 +292,8 @@ EOF
cat >$dir/replace.2.config <<EOF cat >$dir/replace.2.config <<EOF
AffineComponentPreconditionedOnline input-dim=$conv_out_dim2 output-dim=$hidden_dim $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev AffineComponentPreconditionedOnline input-dim=$conv_out_dim2 output-dim=$hidden_dim $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev
SigmoidComponent dim=$hidden_dim RectifiedLinearComponent dim=$hidden_dim
NormalizeComponent dim=$hidden_dim
AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0 AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0
SoftmaxComponent dim=$num_leaves SoftmaxComponent dim=$num_leaves
EOF EOF
...@@ -291,7 +302,8 @@ EOF ...@@ -291,7 +302,8 @@ EOF
# single hidden layer; we need this to add new layers. # single hidden layer; we need this to add new layers.
cat >$dir/replace.3.config <<EOF cat >$dir/replace.3.config <<EOF
AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$hidden_dim $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$hidden_dim $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=$stddev bias-stddev=$bias_stddev
SigmoidComponent dim=$hidden_dim RectifiedLinearComponent dim=$hidden_dim
NormalizeComponent dim=$hidden_dim
AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0 AffineComponentPreconditionedOnline input-dim=$hidden_dim output-dim=$num_leaves $online_preconditioning_opts learning-rate=$initial_lrate param-stddev=0 bias-stddev=0
SoftmaxComponent dim=$num_leaves SoftmaxComponent dim=$num_leaves
EOF EOF
......
...@@ -362,7 +362,7 @@ void UnitTestAffineComponent() { ...@@ -362,7 +362,7 @@ void UnitTestAffineComponent() {
} }
} }
void UnitTestConvolutionComponent() { void UnitTestConvolutional1dComponent() {
BaseFloat learning_rate = 0.01, BaseFloat learning_rate = 0.01,
param_stddev = 0.1, bias_stddev = 1.0; param_stddev = 0.1, bias_stddev = 1.0;
int32 patch_stride = 10, patch_step = 1, patch_dim = 4; int32 patch_stride = 10, patch_step = 1, patch_dim = 4;
...@@ -372,7 +372,7 @@ void UnitTestConvolutionComponent() { ...@@ -372,7 +372,7 @@ void UnitTestConvolutionComponent() {
int32 filter_dim = patch_dim * num_splice; int32 filter_dim = patch_dim * num_splice;
int32 output_dim = num_patches * num_filters; int32 output_dim = num_patches * num_filters;
{ {
ConvolutionComponent component; Convolutional1dComponent component;
if (Rand() % 2 == 0) { if (Rand() % 2 == 0) {
component.Init(learning_rate, input_dim, output_dim, component.Init(learning_rate, input_dim, output_dim,
patch_dim, patch_step, patch_stride, patch_dim, patch_step, patch_stride,
...@@ -394,7 +394,7 @@ void UnitTestConvolutionComponent() { ...@@ -394,7 +394,7 @@ void UnitTestConvolutionComponent() {
} }
{ {
const char *str = "learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10"; const char *str = "learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10";
ConvolutionComponent component; Convolutional1dComponent component;
component.InitFromString(str); component.InitFromString(str);
UnitTestGenericComponentInternal(component); UnitTestGenericComponentInternal(component);
} }
...@@ -890,7 +890,7 @@ int main() { ...@@ -890,7 +890,7 @@ int main() {
UnitTestFixedBiasComponent(); UnitTestFixedBiasComponent();
UnitTestAffineComponentPreconditioned(); UnitTestAffineComponentPreconditioned();
UnitTestAffineComponentPreconditionedOnline(); UnitTestAffineComponentPreconditionedOnline();
UnitTestConvolutionComponent(); UnitTestConvolutional1dComponent();
UnitTestDropoutComponent(); UnitTestDropoutComponent();
UnitTestAdditiveNoiseComponent(); UnitTestAdditiveNoiseComponent();
UnitTestParsing(); UnitTestParsing();
......
...@@ -102,8 +102,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) { ...@@ -102,8 +102,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new DropoutComponent(); ans = new DropoutComponent();
} else if (component_type == "AdditiveNoiseComponent") { } else if (component_type == "AdditiveNoiseComponent") {
ans = new AdditiveNoiseComponent(); ans = new AdditiveNoiseComponent();
} else if (component_type == "ConvolutionComponent") { } else if (component_type == "Convolutional1dComponent") {
ans = new ConvolutionComponent(); ans = new Convolutional1dComponent();
} else if (component_type == "MaxpoolingComponent") { } else if (component_type == "MaxpoolingComponent") {
ans = new MaxpoolingComponent(); ans = new MaxpoolingComponent();
} }
...@@ -3676,19 +3676,19 @@ void AdditiveNoiseComponent::Propagate(const ChunkInfo &in_info, ...@@ -3676,19 +3676,19 @@ void AdditiveNoiseComponent::Propagate(const ChunkInfo &in_info,
out->AddMat(stddev_, rand); out->AddMat(stddev_, rand);
} }
ConvolutionComponent::ConvolutionComponent(): Convolutional1dComponent::Convolutional1dComponent():
UpdatableComponent(), UpdatableComponent(),
patch_dim_(0), patch_step_(0), patch_stride_(0), is_gradient_(false) {} patch_dim_(0), patch_step_(0), patch_stride_(0), is_gradient_(false) {}
ConvolutionComponent::ConvolutionComponent(const ConvolutionComponent &component): Convolutional1dComponent::Convolutional1dComponent(const Convolutional1dComponent &component):
UpdatableComponent(component), UpdatableComponent(component),
filter_params_(component.filter_params_), filter_params_(component.filter_params_),
bias_params_(component.bias_params_), bias_params_(component.bias_params_),
is_gradient_(component.is_gradient_) {} is_gradient_(component.is_gradient_) {}
ConvolutionComponent::ConvolutionComponent(const CuMatrixBase<BaseFloat> &filter_params, Convolutional1dComponent::Convolutional1dComponent(const CuMatrixBase<BaseFloat> &filter_params,
const CuVectorBase<BaseFloat> &bias_params, const CuVectorBase<BaseFloat> &bias_params,
BaseFloat learning_rate): BaseFloat learning_rate):
UpdatableComponent(learning_rate), UpdatableComponent(learning_rate),
filter_params_(filter_params), filter_params_(filter_params),
bias_params_(bias_params) { bias_params_(bias_params) {
...@@ -3698,24 +3698,24 @@ ConvolutionComponent::ConvolutionComponent(const CuMatrixBase<BaseFloat> &filter ...@@ -3698,24 +3698,24 @@ ConvolutionComponent::ConvolutionComponent(const CuMatrixBase<BaseFloat> &filter
} }
// aquire input dim // aquire input dim
int32 ConvolutionComponent::InputDim() const { int32 Convolutional1dComponent::InputDim() const {
int32 filter_dim = filter_params_.NumCols(); int32 filter_dim = filter_params_.NumCols();
int32 num_splice = filter_dim / patch_dim_; int32 num_splice = filter_dim / patch_dim_;
return patch_stride_ * num_splice; return patch_stride_ * num_splice;
} }
// aquire output dim // aquire output dim
int32 ConvolutionComponent::OutputDim() const { int32 Convolutional1dComponent::OutputDim() const {
int32 num_filters = filter_params_.NumRows(); int32 num_filters = filter_params_.NumRows();
int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_; int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
return num_patches * num_filters; return num_patches * num_filters;
} }
// initialize the component using hyperparameters // initialize the component using hyperparameters
void ConvolutionComponent::Init(BaseFloat learning_rate, void Convolutional1dComponent::Init(BaseFloat learning_rate,
int32 input_dim, int32 output_dim, int32 input_dim, int32 output_dim,
int32 patch_dim, int32 patch_step, int32 patch_stride, int32 patch_dim, int32 patch_step, int32 patch_stride,
BaseFloat param_stddev, BaseFloat bias_stddev) { BaseFloat param_stddev, BaseFloat bias_stddev) {
UpdatableComponent::Init(learning_rate); UpdatableComponent::Init(learning_rate);
patch_dim_ = patch_dim; patch_dim_ = patch_dim;
patch_step_ = patch_step; patch_step_ = patch_step;
...@@ -3738,8 +3738,8 @@ void ConvolutionComponent::Init(BaseFloat learning_rate, ...@@ -3738,8 +3738,8 @@ void ConvolutionComponent::Init(BaseFloat learning_rate,
} }
// initialize the component using predefined matrix file // initialize the component using predefined matrix file
void ConvolutionComponent::Init(BaseFloat learning_rate, void Convolutional1dComponent::Init(BaseFloat learning_rate,
std::string matrix_filename) { std::string matrix_filename) {
UpdatableComponent::Init(learning_rate); UpdatableComponent::Init(learning_rate);
CuMatrix<BaseFloat> mat; CuMatrix<BaseFloat> mat;
ReadKaldiObject(matrix_filename, &mat); ReadKaldiObject(matrix_filename, &mat);
...@@ -3753,7 +3753,7 @@ void ConvolutionComponent::Init(BaseFloat learning_rate, ...@@ -3753,7 +3753,7 @@ void ConvolutionComponent::Init(BaseFloat learning_rate,
// resize the component, setting the parameters to zero, while // resize the component, setting the parameters to zero, while
// leaving any other configuration values the same // leaving any other configuration values the same
void ConvolutionComponent::Resize(int32 input_dim, int32 output_dim) { void Convolutional1dComponent::Resize(int32 input_dim, int32 output_dim) {
KALDI_ASSERT(input_dim > 0 && output_dim > 0); KALDI_ASSERT(input_dim > 0 && output_dim > 0);
int32 num_splice = input_dim / patch_stride_; int32 num_splice = input_dim / patch_stride_;
int32 filter_dim = num_splice * patch_dim_; int32 filter_dim = num_splice * patch_dim_;
...@@ -3767,7 +3767,7 @@ void ConvolutionComponent::Resize(int32 input_dim, int32 output_dim) { ...@@ -3767,7 +3767,7 @@ void ConvolutionComponent::Resize(int32 input_dim, int32 output_dim) {
} }
// display information about component // display information about component
std::string ConvolutionComponent::Info() const { std::string Convolutional1dComponent::Info() const {
std::stringstream stream; std::stringstream stream;
BaseFloat filter_params_size = static_cast<BaseFloat>(filter_params_.NumRows()) BaseFloat filter_params_size = static_cast<BaseFloat>(filter_params_.NumRows())
* static_cast<BaseFloat>(filter_params_.NumCols()); * static_cast<BaseFloat>(filter_params_.NumCols());
...@@ -3795,7 +3795,7 @@ std::string ConvolutionComponent::Info() const { ...@@ -3795,7 +3795,7 @@ std::string ConvolutionComponent::Info() const {
} }
// initialize the component using configuration file // initialize the component using configuration file
void ConvolutionComponent::InitFromString(std::string args) { void Convolutional1dComponent::InitFromString(std::string args) {
std::string orig_args(args); std::string orig_args(args);
bool ok = true; bool ok = true;
BaseFloat learning_rate = learning_rate_; BaseFloat learning_rate = learning_rate_;
...@@ -3832,10 +3832,34 @@ void ConvolutionComponent::InitFromString(std::string args) { ...@@ -3832,10 +3832,34 @@ void ConvolutionComponent::InitFromString(std::string args) {
} }
// propagation function // propagation function
void ConvolutionComponent::Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info, /* Convolutional propagation is explained:
const CuMatrixBase<BaseFloat> &in, - Recall the AffineComponent, input X is defined #frames x $input-dim,
CuMatrixBase<BaseFloat> *out) const { linear matrix A is defined $output-dim x $input-dim, and bias
vector B is defined by length $output-dim. The propagation is
Y = X * A' + B (1)
where "*" is row-by-row processing of X, executing vector-matrix
multiplication
Y(t) = X(t) * A' + B (2)
which converts each row of input of dim $input-dim to a row of output of
dim $output-dim by A' (' defines transpose).
- In Convolution1dComponent, A is redefined $num-filters x $filter-dim,
and bias vector B is redefined by length $num-filters. The propatation is
Y = X o A' + B (3)
where "o" is also row-by-row processing of X, but executing vector-matrix
convolution, which consists of a group of vector-vector convolutions.
For instance, the convolution of X(t) and the i-th filter A(i) is
Y(t,i) = X(t) o A'(i) + B(i) (4)
The convolution used here is valid convolution. Meaning that the
output of M o N is of dim |M| - |N| + 1, assuming M is not shorter then N.
Note that in all the equations, B is extended to proper dimensions
for legal addition.
*/
void Convolutional1dComponent::Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in,
CuMatrixBase<BaseFloat> *out) const {
in_info.CheckSize(in); in_info.CheckSize(in);
out_info.CheckSize(*out); out_info.CheckSize(*out);
KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks()); KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
...@@ -3885,30 +3909,30 @@ void ConvolutionComponent::Propagate(const ChunkInfo &in_info, ...@@ -3885,30 +3909,30 @@ void ConvolutionComponent::Propagate(const ChunkInfo &in_info,
} }
// scale the parameters // scale the parameters
void ConvolutionComponent::Scale(BaseFloat scale) { void Convolutional1dComponent::Scale(BaseFloat scale) {
filter_params_.Scale(scale); filter_params_.Scale(scale);
bias_params_.Scale(scale); bias_params_.Scale(scale);
} }
// add another convolution component // add another convolution component
void ConvolutionComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) { void Convolutional1dComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) {
const ConvolutionComponent *other = const Convolutional1dComponent *other =
dynamic_cast<const ConvolutionComponent*>(&other_in); dynamic_cast<const Convolutional1dComponent*>(&other_in);
KALDI_ASSERT(other != NULL); KALDI_ASSERT(other != NULL);
filter_params_.AddMat(alpha, other->filter_params_); filter_params_.AddMat(alpha, other->filter_params_);
bias_params_.AddVec(alpha, other->bias_params_); bias_params_.AddVec(alpha, other->bias_params_);
} }
// back propagation function // back propagation function
void ConvolutionComponent::Backprop(const ChunkInfo &in_info, void Convolutional1dComponent::Backprop(const ChunkInfo &in_info,
const ChunkInfo &out_info, const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in_value, const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value, const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv, const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update_in, Component *to_update_in,
CuMatrix<BaseFloat> *in_deriv) const { CuMatrix<BaseFloat> *in_deriv) const {
in_deriv->Resize(out_deriv.NumRows(), InputDim()); in_deriv->Resize(out_deriv.NumRows(), InputDim());
ConvolutionComponent *to_update = dynamic_cast<ConvolutionComponent*>(to_update_in); Convolutional1dComponent *to_update = dynamic_cast<Convolutional1dComponent*>(to_update_in);
int32 num_splice = InputDim() / patch_stride_; int32 num_splice = InputDim() / patch_stride_;
int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_; int32 num_patches = 1 + (patch_stride_ - patch_dim_) / patch_step_;
int32 num_filters = filter_params_.NumRows(); int32 num_filters = filter_params_.NumRows();
...@@ -3952,7 +3976,7 @@ void ConvolutionComponent::Backprop(const ChunkInfo &in_info, ...@@ -3952,7 +3976,7 @@ void ConvolutionComponent::Backprop(const ChunkInfo &in_info,
} }
} }
void ConvolutionComponent::SetZero(bool treat_as_gradient) { void Convolutional1dComponent::SetZero(bool treat_as_gradient) {
if (treat_as_gradient) { if (treat_as_gradient) {
SetLearningRate(1.0); SetLearningRate(1.0);
} }
...@@ -3963,11 +3987,11 @@ void ConvolutionComponent::SetZero(bool treat_as_gradient) { ...@@ -3963,11 +3987,11 @@ void ConvolutionComponent::SetZero(bool treat_as_gradient) {
} }
} }
void ConvolutionComponent::Read(std::istream &is, bool binary) { void Convolutional1dComponent::Read(std::istream &is, bool binary) {
std::ostringstream ostr_beg, ostr_end; std::ostringstream ostr_beg, ostr_end;
ostr_beg << "<" << Type() << ">"; // e.g. "<ConvolutionComponent>" ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
ostr_end << "</" << Type() << ">"; // e.g. "</ConvolutionComponent>" ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
// might not see the "<ConvolutionComponent>" part because // might not see the "<Convolutional1dComponent>" part because
// of how ReadNew() works. // of how ReadNew() works.
ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>"); ExpectOneOrTwoTokens(is, binary, ostr_beg.str(), "<LearningRate>");
ReadBasicType(is, binary, &learning_rate_); ReadBasicType(is, binary, &learning_rate_);
...@@ -3992,10 +4016,10 @@ void ConvolutionComponent::Read(std::istream &is, bool binary) { ...@@ -3992,10 +4016,10 @@ void ConvolutionComponent::Read(std::istream &is, bool binary) {
} }
} }
void ConvolutionComponent::Write(std::ostream &os, bool binary) const { void Convolutional1dComponent::Write(std::ostream &os, bool binary) const {
std::ostringstream ostr_beg, ostr_end; std::ostringstream ostr_beg, ostr_end;
ostr_beg << "<" << Type() << ">"; // e.g. "<ConvolutionComponent>" ostr_beg << "<" << Type() << ">"; // e.g. "<Convolutional1dComponent>"
ostr_end << "</" << Type() << ">"; // e.g. "</ConvolutionComponent>" ostr_end << "</" << Type() << ">"; // e.g. "</Convolutional1dComponent>"
WriteToken(os, binary, ostr_beg.str()); WriteToken(os, binary, ostr_beg.str());
WriteToken(os, binary, "<LearningRate>"); WriteToken(os, binary, "<LearningRate>");
WriteBasicType(os, binary, learning_rate_); WriteBasicType(os, binary, learning_rate_);
...@@ -4014,15 +4038,15 @@ void ConvolutionComponent::Write(std::ostream &os, bool binary) const { ...@@ -4014,15 +4038,15 @@ void ConvolutionComponent::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, ostr_end.str()); WriteToken(os, binary, ostr_end.str());
} }
BaseFloat ConvolutionComponent::DotProduct(const UpdatableComponent &other_in) const { BaseFloat Convolutional1dComponent::DotProduct(const UpdatableComponent &other_in) const {
const ConvolutionComponent *other = const Convolutional1dComponent *other =
dynamic_cast<const ConvolutionComponent*>(&other_in); dynamic_cast<const Convolutional1dComponent*>(&other_in);
return TraceMatMat(filter_params_, other->filter_params_, kTrans) return TraceMatMat(filter_params_, other->filter_params_, kTrans)
+ VecVec(bias_params_, other->bias_params_); + VecVec(bias_params_, other->bias_params_);
} }
Component* ConvolutionComponent::Copy() const { Component* Convolutional1dComponent::Copy() const {
ConvolutionComponent *ans = new ConvolutionComponent(); Convolutional1dComponent *ans = new Convolutional1dComponent();
ans->learning_rate_ = learning_rate_; ans->learning_rate_ = learning_rate_;
ans->patch_dim_ = patch_dim_; ans->patch_dim_ = patch_dim_;
ans->patch_step_ = patch_step_; ans->patch_step_ = patch_step_;
...@@ -4033,7 +4057,7 @@ Component* ConvolutionComponent::Copy() const { ...@@ -4033,7 +4057,7 @@ Component* ConvolutionComponent::Copy() const {
return ans; return ans;
} }
void ConvolutionComponent::PerturbParams(BaseFloat stddev) { void Convolutional1dComponent::PerturbParams(BaseFloat stddev) {
CuMatrix<BaseFloat> temp_filter_params(filter_params_); CuMatrix<BaseFloat> temp_filter_params(filter_params_);
temp_filter_params.SetRandn(); temp_filter_params.SetRandn();
filter_params_.AddMat(stddev, temp_filter_params); filter_params_.AddMat(stddev, temp_filter_params);
...@@ -4043,20 +4067,20 @@ void ConvolutionComponent::PerturbParams(BaseFloat stddev) { ...@@ -4043,20 +4067,20 @@ void ConvolutionComponent::PerturbParams(BaseFloat stddev) {
bias_params_.AddVec(stddev, temp_bias_params); bias_params_.AddVec(stddev, temp_bias_params);
} }
void ConvolutionComponent::SetParams(const VectorBase<BaseFloat> &bias, void Convolutional1dComponent::SetParams(const VectorBase<BaseFloat> &bias,
const MatrixBase<BaseFloat> &filter) { const MatrixBase<BaseFloat> &filter) {