Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Abdelwahab HEBA
kaldi_2015
Commits
9d4b994f
Commit
9d4b994f
authored
Aug 07, 2015
by
naxingyu
Browse files
add more doc
parent
885586f9
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
161 additions
and
86 deletions
+161
-86
egs/hkust/s5/RESULTS
egs/hkust/s5/RESULTS
+3
-0
egs/hkust/s5/local/nnet2/run_convnet.sh
egs/hkust/s5/local/nnet2/run_convnet.sh
+14
-4
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
+22
-10
src/nnet2/nnet-component-test.cc
src/nnet2/nnet-component-test.cc
+4
-4
src/nnet2/nnet-component.cc
src/nnet2/nnet-component.cc
+77
-53
src/nnet2/nnet-component.h
src/nnet2/nnet-component.h
+41
-15
No files found.
egs/hkust/s5/RESULTS
View file @
9d4b994f
...
...
@@ -7,3 +7,6 @@ exp/tri5a/decode/cer_13:%WER 49.67 [ 27891 / 56154, 2877 ins, 4538 del, 20476 su
exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ]
exp/tri5a_mmi_b0.1/decode/cer_11:%WER 44.24 [ 24840 / 56154, 2060 ins, 4118 del, 18662 sub ]
exp/tri5a_mpe/decode/cer_12:%WER 44.96 [ 25247 / 56154, 2233 ins, 4174 del, 18840 sub ]
# ConvNet with 2 convolutional layers and 2 ReLU layers
exp/nnet2_convnet/decode/cer_10:%WER 40.73 [ 22873 / 56154, 2609 ins, 3712 del, 16552 sub ]
egs/hkust/s5/local/nnet2/run_convnet.sh
View file @
9d4b994f
#!/bin/bash
# 2015 Xingyu Na
# This runs on the full training set, using ConvNet setup with
# Sigmoid affine layers, on top of fbank features, on GPU.
# This script runs on the full training set, using ConvNet setup on top of
# fbank features, on GPU. The ConvNet has four hidden layers, two convolutional
# layers and two affine transform layers with ReLU nonlinearity.
# Convolutional layer [1]:
# convolution1d, input feature dim is 36, filter dim is 7, output dim is
# 30, 128 filters are used
# maxpooling, 3-to-1 maxpooling, input dim is 30, output dim is 10
# Convolutional layer [2]:
# convolution1d, input feature dim is 10, filter dim is 4, output dim is
# 7, 256 filters are used
# Affine transform layers [3-4]:
# affine transform with ReLU nonlinearity.
temp_dir
=
dir
=
exp/nnet2_convnet
...
...
@@ -16,7 +26,7 @@ train=data-fb/train
.
utils/parse_options.sh
parallel_opts
=
"--gpu 1"
# This is suitable for the CLSP network, you'll
# likely have to change it.
# likely have to change it.
# Make the FBANK features
if
[
$stage
-le
-5
]
;
then
...
...
@@ -40,7 +50,7 @@ fi
--mix-up
20000
--samples-per-iter
300000
\
--num-epochs
15
--delta-order
2
\
--initial-effective-lrate
0.0005
--final-effective-lrate
0.000025
\
--num-jobs-initial
3
--num-jobs-final
8
--num-hidden-layers
4
--splice-width
5
\
--num-jobs-initial
3
--num-jobs-final
8
--splice-width
5
\
--hidden-dim
2000
--num-filters1
128
--patch-dim1
7
--pool-size
3
\
--num-filters2
256
--patch-dim2
4
\
$train
data/lang exp/tri5a_ali
$dir
||
exit
1
;
...
...
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
View file @
9d4b994f
...
...
@@ -4,10 +4,14 @@
# 2013 Xiaohui Zhang
# 2013 Guoguo Chen
# 2014 Vimal Manohar
# 2015 Xingyu Na
# Apache 2.0.
# train_convnet_accel2.sh is modified from train_pnorm_accel2.sh
# train_convnet_accel2.sh is modified from train_pnorm_accel2.sh. It propotypes
# the training of a ConvNet. The ConvNet is composed of 4 layers. The first layer
# is a Convolutional1d component plus a Maxpooling component. The second layer
# is a single Convolutional1d component. The third and fourth layers are affine
# components with ReLU nonlinearities. Due to non-squashing output, normalize
# component is applied to all four layers.
# train_pnorm_accel2.sh is a modified form of train_pnorm_simple2.sh (the "2"
# suffix is because they both use the the "new" egs format, created by
...
...
@@ -61,8 +65,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of
# affect each others' gradients.
add_layers_period
=
2
# by default, add new layers every 2 iterations.
num_hidden_layers
=
3
stage
=
-4
stage
=
-3
splice_width
=
4
# meaning +- 4 frames on each side for second LDA
left_context
=
# if set, overrides splice-width
...
...
@@ -129,7 +132,6 @@ if [ $# != 4 ]; then
echo
" --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training,"
echo
" # actual learning-rate is this time num-jobs."
echo
" --final-effective-lrate <lrate|0.004> # effective learning rate at end of training."
echo
" --num-hidden-layers <#hidden-layers|2> # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
echo
" --add-layers-period <#iters|2> # Number of iterations between adding hidden layers"
echo
" --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer,"
echo
" # per context-dependent state. Try a number several times #states."
...
...
@@ -148,7 +150,6 @@ if [ $# != 4 ]; then
echo
" # process."
echo
" --splice-width <width|4> # Number of frames on each side to append for feature input"
echo
" # (note: we splice processed, typically 40-dimensional frames"
echo
" --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo
" --realign-epochs <list-of-epochs|
\"\"
> # A list of space-separated epoch indices the beginning of which"
echo
" # realignment is to be done"
echo
" --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
...
...
@@ -156,6 +157,15 @@ if [ $# != 4 ]; then
echo
" --num-jobs-align <#njobs|30> # Number of jobs to perform realignment"
echo
" --stage <stage|-4> # Used to run a partially-completed training process from somewhere in"
echo
" # the middle."
echo
"ConvNet configurations"
echo
" --num-filters1 <num-filters1|128> # number of filters in the first convolutional layer."
echo
" --patch-step1 <patch-step1|1> # patch step of the first convolutional layer."
echo
" --patch-dim1 <patch-dim1|7> # dim of convolutional kernel in the first layer."
echo
" # (note: (feat-dim - patch-dim1) % patch-step1 should be 0.)"
echo
" --pool-size <pool-size|3> # size of pooling after the first convolutional layer."
echo
" # (note: (feat-dim - patch-dim1 + 1) % pool-size should be 0.)"
echo
" --num-filters2 <num-filters2|256> # number of filters in the second convolutional layer."
echo
" --patch-dim2 <patch-dim2|4> # dim of convolutional kernel in the second layer."
exit
1
;
...
...
@@ -266,7 +276,7 @@ if [ $stage -le -2 ]; then
stddev
=
`
perl
-e
"print 1.0/sqrt(
$hidden_dim
);"
`
cat
>
$dir
/nnet.config
<<
EOF
SpliceComponent input-dim=
$delta_feat_dim
left-context=
$left_context
right-context=
$right_context
ConvolutionComponent input-dim=
$tot_input_dim
output-dim=
$conv_out_dim1
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
patch-dim=
$patch_dim1
patch-step=
$patch_step1
patch-stride=
$feat_dim
Convolution
al1d
Component input-dim=
$tot_input_dim
output-dim=
$conv_out_dim1
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
patch-dim=
$patch_dim1
patch-step=
$patch_step1
patch-stride=
$feat_dim
MaxpoolingComponent input-dim=
$conv_out_dim1
output-dim=
$pool_out_dim
pool-size=
$pool_size
pool-stride=
$num_filters1
NormalizeComponent dim=
$pool_out_dim
AffineComponentPreconditionedOnline input-dim=
$pool_out_dim
output-dim=
$num_leaves
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=0 bias-stddev=0
...
...
@@ -274,7 +284,7 @@ SoftmaxComponent dim=$num_leaves
EOF
cat
>
$dir
/replace.1.config
<<
EOF
ConvolutionComponent input-dim=
$pool_out_dim
output-dim=
$conv_out_dim2
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
patch-dim=
$patch_dim2
patch-step=
$patch_step2
patch-stride=
$patch_stride2
Convolution
al1d
Component input-dim=
$pool_out_dim
output-dim=
$conv_out_dim2
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
patch-dim=
$patch_dim2
patch-step=
$patch_step2
patch-stride=
$patch_stride2
NormalizeComponent dim=
$conv_out_dim2
AffineComponentPreconditionedOnline input-dim=
$conv_out_dim2
output-dim=
$num_leaves
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=0 bias-stddev=0
SoftmaxComponent dim=
$num_leaves
...
...
@@ -282,7 +292,8 @@ EOF
cat
>
$dir
/replace.2.config
<<
EOF
AffineComponentPreconditionedOnline input-dim=
$conv_out_dim2
output-dim=
$hidden_dim
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
SigmoidComponent dim=
$hidden_dim
RectifiedLinearComponent dim=
$hidden_dim
NormalizeComponent dim=
$hidden_dim
AffineComponentPreconditionedOnline input-dim=
$hidden_dim
output-dim=
$num_leaves
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=0 bias-stddev=0
SoftmaxComponent dim=
$num_leaves
EOF
...
...
@@ -291,7 +302,8 @@ EOF
# single hidden layer; we need this to add new layers.
cat
>
$dir
/replace.3.config
<<
EOF
AffineComponentPreconditionedOnline input-dim=
$hidden_dim
output-dim=
$hidden_dim
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=
$stddev
bias-stddev=
$bias_stddev
SigmoidComponent dim=
$hidden_dim
RectifiedLinearComponent dim=
$hidden_dim
NormalizeComponent dim=
$hidden_dim
AffineComponentPreconditionedOnline input-dim=
$hidden_dim
output-dim=
$num_leaves
$online_preconditioning_opts
learning-rate=
$initial_lrate
param-stddev=0 bias-stddev=0
SoftmaxComponent dim=
$num_leaves
EOF
...
...
src/nnet2/nnet-component-test.cc
View file @
9d4b994f
...
...
@@ -362,7 +362,7 @@ void UnitTestAffineComponent() {
}
}
void
UnitTestConvolutionComponent
()
{
void
UnitTestConvolution
al1d
Component
()
{
BaseFloat
learning_rate
=
0.01
,
param_stddev
=
0.1
,
bias_stddev
=
1.0
;
int32
patch_stride
=
10
,
patch_step
=
1
,
patch_dim
=
4
;
...
...
@@ -372,7 +372,7 @@ void UnitTestConvolutionComponent() {
int32
filter_dim
=
patch_dim
*
num_splice
;
int32
output_dim
=
num_patches
*
num_filters
;
{
ConvolutionComponent
component
;
Convolution
al1d
Component
component
;
if
(
Rand
()
%
2
==
0
)
{
component
.
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
...
...
@@ -394,7 +394,7 @@ void UnitTestConvolutionComponent() {
}
{
const
char
*
str
=
"learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10"
;
ConvolutionComponent
component
;
Convolution
al1d
Component
component
;
component
.
InitFromString
(
str
);
UnitTestGenericComponentInternal
(
component
);
}
...
...
@@ -890,7 +890,7 @@ int main() {
UnitTestFixedBiasComponent
();
UnitTestAffineComponentPreconditioned
();
UnitTestAffineComponentPreconditionedOnline
();
UnitTestConvolutionComponent
();
UnitTestConvolution
al1d
Component
();
UnitTestDropoutComponent
();
UnitTestAdditiveNoiseComponent
();
UnitTestParsing
();
...
...
src/nnet2/nnet-component.cc
View file @
9d4b994f
...
...
@@ -102,8 +102,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans
=
new
DropoutComponent
();
}
else
if
(
component_type
==
"AdditiveNoiseComponent"
)
{
ans
=
new
AdditiveNoiseComponent
();
}
else
if
(
component_type
==
"ConvolutionComponent"
)
{
ans
=
new
ConvolutionComponent
();
}
else
if
(
component_type
==
"Convolution
al1d
Component"
)
{
ans
=
new
Convolution
al1d
Component
();
}
else
if
(
component_type
==
"MaxpoolingComponent"
)
{
ans
=
new
MaxpoolingComponent
();
}
...
...
@@ -3676,19 +3676,19 @@ void AdditiveNoiseComponent::Propagate(const ChunkInfo &in_info,
out
->
AddMat
(
stddev_
,
rand
);
}
ConvolutionComponent
::
ConvolutionComponent
()
:
Convolution
al1d
Component
::
Convolution
al1d
Component
()
:
UpdatableComponent
(),
patch_dim_
(
0
),
patch_step_
(
0
),
patch_stride_
(
0
),
is_gradient_
(
false
)
{}
ConvolutionComponent
::
ConvolutionComponent
(
const
ConvolutionComponent
&
component
)
:
Convolution
al1d
Component
::
Convolution
al1d
Component
(
const
Convolution
al1d
Component
&
component
)
:
UpdatableComponent
(
component
),
filter_params_
(
component
.
filter_params_
),
bias_params_
(
component
.
bias_params_
),
is_gradient_
(
component
.
is_gradient_
)
{}
ConvolutionComponent
::
ConvolutionComponent
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
)
:
Convolution
al1d
Component
::
Convolution
al1d
Component
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
)
:
UpdatableComponent
(
learning_rate
),
filter_params_
(
filter_params
),
bias_params_
(
bias_params
)
{
...
...
@@ -3698,24 +3698,24 @@ ConvolutionComponent::ConvolutionComponent(const CuMatrixBase<BaseFloat> &filter
}
// aquire input dim
int32
ConvolutionComponent
::
InputDim
()
const
{
int32
Convolution
al1d
Component
::
InputDim
()
const
{
int32
filter_dim
=
filter_params_
.
NumCols
();
int32
num_splice
=
filter_dim
/
patch_dim_
;
return
patch_stride_
*
num_splice
;
}
// aquire output dim
int32
ConvolutionComponent
::
OutputDim
()
const
{
int32
Convolution
al1d
Component
::
OutputDim
()
const
{
int32
num_filters
=
filter_params_
.
NumRows
();
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
return
num_patches
*
num_filters
;
}
// initialize the component using hyperparameters
void
ConvolutionComponent
::
Init
(
BaseFloat
learning_rate
,
int32
input_dim
,
int32
output_dim
,
int32
patch_dim
,
int32
patch_step
,
int32
patch_stride
,
BaseFloat
param_stddev
,
BaseFloat
bias_stddev
)
{
void
Convolution
al1d
Component
::
Init
(
BaseFloat
learning_rate
,
int32
input_dim
,
int32
output_dim
,
int32
patch_dim
,
int32
patch_step
,
int32
patch_stride
,
BaseFloat
param_stddev
,
BaseFloat
bias_stddev
)
{
UpdatableComponent
::
Init
(
learning_rate
);
patch_dim_
=
patch_dim
;
patch_step_
=
patch_step
;
...
...
@@ -3738,8 +3738,8 @@ void ConvolutionComponent::Init(BaseFloat learning_rate,
}
// initialize the component using predefined matrix file
void
ConvolutionComponent
::
Init
(
BaseFloat
learning_rate
,
std
::
string
matrix_filename
)
{
void
Convolution
al1d
Component
::
Init
(
BaseFloat
learning_rate
,
std
::
string
matrix_filename
)
{
UpdatableComponent
::
Init
(
learning_rate
);
CuMatrix
<
BaseFloat
>
mat
;
ReadKaldiObject
(
matrix_filename
,
&
mat
);
...
...
@@ -3753,7 +3753,7 @@ void ConvolutionComponent::Init(BaseFloat learning_rate,
// resize the component, setting the parameters to zero, while
// leaving any other configuration values the same
void
ConvolutionComponent
::
Resize
(
int32
input_dim
,
int32
output_dim
)
{
void
Convolution
al1d
Component
::
Resize
(
int32
input_dim
,
int32
output_dim
)
{
KALDI_ASSERT
(
input_dim
>
0
&&
output_dim
>
0
);
int32
num_splice
=
input_dim
/
patch_stride_
;
int32
filter_dim
=
num_splice
*
patch_dim_
;
...
...
@@ -3767,7 +3767,7 @@ void ConvolutionComponent::Resize(int32 input_dim, int32 output_dim) {
}
// display information about component
std
::
string
ConvolutionComponent
::
Info
()
const
{
std
::
string
Convolution
al1d
Component
::
Info
()
const
{
std
::
stringstream
stream
;
BaseFloat
filter_params_size
=
static_cast
<
BaseFloat
>
(
filter_params_
.
NumRows
())
*
static_cast
<
BaseFloat
>
(
filter_params_
.
NumCols
());
...
...
@@ -3795,7 +3795,7 @@ std::string ConvolutionComponent::Info() const {
}
// initialize the component using configuration file
void
ConvolutionComponent
::
InitFromString
(
std
::
string
args
)
{
void
Convolution
al1d
Component
::
InitFromString
(
std
::
string
args
)
{
std
::
string
orig_args
(
args
);
bool
ok
=
true
;
BaseFloat
learning_rate
=
learning_rate_
;
...
...
@@ -3832,10 +3832,34 @@ void ConvolutionComponent::InitFromString(std::string args) {
}
// propagation function
void
ConvolutionComponent
::
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
{
/* Convolutional propagation is explained:
- Recall the AffineComponent, input X is defined #frames x $input-dim,
linear matrix A is defined $output-dim x $input-dim, and bias
vector B is defined by length $output-dim. The propagation is
Y = X * A' + B (1)
where "*" is row-by-row processing of X, executing vector-matrix
multiplication
Y(t) = X(t) * A' + B (2)
which converts each row of input of dim $input-dim to a row of output of
dim $output-dim by A' (' defines transpose).
- In Convolution1dComponent, A is redefined $num-filters x $filter-dim,
and bias vector B is redefined by length $num-filters. The propatation is
Y = X o A' + B (3)
where "o" is also row-by-row processing of X, but executing vector-matrix
convolution, which consists of a group of vector-vector convolutions.
For instance, the convolution of X(t) and the i-th filter A(i) is
Y(t,i) = X(t) o A'(i) + B(i) (4)
The convolution used here is valid convolution. Meaning that the
output of M o N is of dim |M| - |N| + 1, assuming M is not shorter then N.
Note that in all the equations, B is extended to proper dimensions
for legal addition.
*/
void
Convolutional1dComponent
::
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
{
in_info
.
CheckSize
(
in
);
out_info
.
CheckSize
(
*
out
);
KALDI_ASSERT
(
in_info
.
NumChunks
()
==
out_info
.
NumChunks
());
...
...
@@ -3885,30 +3909,30 @@ void ConvolutionComponent::Propagate(const ChunkInfo &in_info,
}
// scale the parameters
void
ConvolutionComponent
::
Scale
(
BaseFloat
scale
)
{
void
Convolution
al1d
Component
::
Scale
(
BaseFloat
scale
)
{
filter_params_
.
Scale
(
scale
);
bias_params_
.
Scale
(
scale
);
}
// add another convolution component
void
ConvolutionComponent
::
Add
(
BaseFloat
alpha
,
const
UpdatableComponent
&
other_in
)
{
const
ConvolutionComponent
*
other
=
dynamic_cast
<
const
ConvolutionComponent
*>
(
&
other_in
);
void
Convolution
al1d
Component
::
Add
(
BaseFloat
alpha
,
const
UpdatableComponent
&
other_in
)
{
const
Convolution
al1d
Component
*
other
=
dynamic_cast
<
const
Convolution
al1d
Component
*>
(
&
other_in
);
KALDI_ASSERT
(
other
!=
NULL
);
filter_params_
.
AddMat
(
alpha
,
other
->
filter_params_
);
bias_params_
.
AddVec
(
alpha
,
other
->
bias_params_
);
}
// back propagation function
void
ConvolutionComponent
::
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update_in
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
{
void
Convolution
al1d
Component
::
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update_in
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
{
in_deriv
->
Resize
(
out_deriv
.
NumRows
(),
InputDim
());
ConvolutionComponent
*
to_update
=
dynamic_cast
<
ConvolutionComponent
*>
(
to_update_in
);
Convolution
al1d
Component
*
to_update
=
dynamic_cast
<
Convolution
al1d
Component
*>
(
to_update_in
);
int32
num_splice
=
InputDim
()
/
patch_stride_
;
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
filter_params_
.
NumRows
();
...
...
@@ -3952,7 +3976,7 @@ void ConvolutionComponent::Backprop(const ChunkInfo &in_info,
}
}
void
ConvolutionComponent
::
SetZero
(
bool
treat_as_gradient
)
{
void
Convolution
al1d
Component
::
SetZero
(
bool
treat_as_gradient
)
{
if
(
treat_as_gradient
)
{
SetLearningRate
(
1.0
);
}
...
...
@@ -3963,11 +3987,11 @@ void ConvolutionComponent::SetZero(bool treat_as_gradient) {
}
}
void
ConvolutionComponent
::
Read
(
std
::
istream
&
is
,
bool
binary
)
{
void
Convolution
al1d
Component
::
Read
(
std
::
istream
&
is
,
bool
binary
)
{
std
::
ostringstream
ostr_beg
,
ostr_end
;
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<ConvolutionComponent>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</ConvolutionComponent>"
// might not see the "<ConvolutionComponent>" part because
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<Convolution
al1d
Component>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</Convolution
al1d
Component>"
// might not see the "<Convolution
al1d
Component>" part because
// of how ReadNew() works.
ExpectOneOrTwoTokens
(
is
,
binary
,
ostr_beg
.
str
(),
"<LearningRate>"
);
ReadBasicType
(
is
,
binary
,
&
learning_rate_
);
...
...
@@ -3992,10 +4016,10 @@ void ConvolutionComponent::Read(std::istream &is, bool binary) {
}
}
void
ConvolutionComponent
::
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
{
void
Convolution
al1d
Component
::
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
{
std
::
ostringstream
ostr_beg
,
ostr_end
;
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<ConvolutionComponent>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</ConvolutionComponent>"
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<Convolution
al1d
Component>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</Convolution
al1d
Component>"
WriteToken
(
os
,
binary
,
ostr_beg
.
str
());
WriteToken
(
os
,
binary
,
"<LearningRate>"
);
WriteBasicType
(
os
,
binary
,
learning_rate_
);
...
...
@@ -4014,15 +4038,15 @@ void ConvolutionComponent::Write(std::ostream &os, bool binary) const {
WriteToken
(
os
,
binary
,
ostr_end
.
str
());
}
BaseFloat
ConvolutionComponent
::
DotProduct
(
const
UpdatableComponent
&
other_in
)
const
{
const
ConvolutionComponent
*
other
=
dynamic_cast
<
const
ConvolutionComponent
*>
(
&
other_in
);
BaseFloat
Convolution
al1d
Component
::
DotProduct
(
const
UpdatableComponent
&
other_in
)
const
{
const
Convolution
al1d
Component
*
other
=
dynamic_cast
<
const
Convolution
al1d
Component
*>
(
&
other_in
);
return
TraceMatMat
(
filter_params_
,
other
->
filter_params_
,
kTrans
)
+
VecVec
(
bias_params_
,
other
->
bias_params_
);
}
Component
*
ConvolutionComponent
::
Copy
()
const
{
ConvolutionComponent
*
ans
=
new
ConvolutionComponent
();
Component
*
Convolution
al1d
Component
::
Copy
()
const
{
Convolution
al1d
Component
*
ans
=
new
Convolution
al1d
Component
();
ans
->
learning_rate_
=
learning_rate_
;
ans
->
patch_dim_
=
patch_dim_
;
ans
->
patch_step_
=
patch_step_
;
...
...
@@ -4033,7 +4057,7 @@ Component* ConvolutionComponent::Copy() const {
return
ans
;
}
void
ConvolutionComponent
::
PerturbParams
(
BaseFloat
stddev
)
{
void
Convolution
al1d
Component
::
PerturbParams
(
BaseFloat
stddev
)
{
CuMatrix
<
BaseFloat
>
temp_filter_params
(
filter_params_
);
temp_filter_params
.
SetRandn
();
filter_params_
.
AddMat
(
stddev
,
temp_filter_params
);
...
...
@@ -4043,20 +4067,20 @@ void ConvolutionComponent::PerturbParams(BaseFloat stddev) {
bias_params_
.
AddVec
(
stddev
,
temp_bias_params
);
}
void
ConvolutionComponent
::
SetParams
(
const
VectorBase
<
BaseFloat
>
&
bias
,
const
MatrixBase
<
BaseFloat
>
&
filter
)
{
void
Convolution
al1d
Component
::
SetParams
(
const
VectorBase
<
BaseFloat
>
&
bias
,
const
MatrixBase
<
BaseFloat
>
&
filter
)
{
bias_params_
=
bias
;
filter_params_
=
filter
;
KALDI_ASSERT
(
bias_params_
.
Dim
()
==
filter_params_
.
NumRows
());
}
int32
ConvolutionComponent
::
GetParameterDim
()
const
{
int32
Convolution
al1d
Component
::
GetParameterDim
()
const
{
return
(
filter_params_
.
NumCols
()
+
1
)
*
filter_params_
.
NumRows
();
}
// update parameters
void
ConvolutionComponent
::
Update
(
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
)
{
void
Convolution
al1d
Component
::
Update
(
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
)
{
// useful dims
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
filter_params_
.
NumRows
();
...
...
src/nnet2/nnet-component.h
View file @
9d4b994f
...
...
@@ -450,8 +450,18 @@ class MaxoutComponent: public Component {
/**
* MaxPoolingComponent :
* Maxpooling component was firstly used in ConvNet for selecting an representative
* activation in an area. It inspired Maxout nonlinearity.
*
* The input/output matrices are split to submatrices with width 'pool_stride_'.
* The pooling is done over 3rd axis, of the set of 2d matrices.
* For instance, a minibatch of 512 frames is propagated by a convolutional
* layer, resulting in a 512 x 3840 input matrix for MaxpoolingComponent,
* which is composed of 128 feature maps for each frame (128 x 30). If you want
* a 3-to-1 maxpooling on each feature map, set 'pool_stride_' and 'pool_size_'
* as 128 and 3 respectively. Maxpooling component would create an output
* matrix of 512 x 1280. The 30 input neurons are grouped by a group size of 3, and
* the maximum in a group is selected, creating a smaller feature map of 10.
*
* Our pooling does not supports overlaps, which simplifies the
* implementation (and was not helpful for Ossama).
*/
...
...
@@ -1667,7 +1677,7 @@ class AdditiveNoiseComponent: public RandomComponent {
};
/**
* ConvolutionComponent implements convolution over frequency axis.
* Convolution
al1d
Component implements convolution over frequency axis.
* We assume the input featrues are spliced, i.e. each frame is in
* fact a set of stacked frames, where we can form patches which span
* over several frequency bands and whole time axis. A patch is the
...
...
@@ -1676,7 +1686,10 @@ class AdditiveNoiseComponent: public RandomComponent {
*
* The convolution is done over whole axis with same filter
* coefficients, i.e. we don't use separate filters for different
* 'regions' of frequency axis.
* 'regions' of frequency axis. Due to convolution, same weights are
* used repeateadly, the final gradient is a sum of all
* position-specific gradients (the sum was found better than
* averaging).
*
* In order to have a fast implementations, the filters are
* represented in vectorized form, where each rectangular filter
...
...
@@ -1690,21 +1703,34 @@ class AdditiveNoiseComponent: public RandomComponent {
* patch_step_ ... size of shift in the convolution
* patch_stride_ ... shift for 2nd dim of a patch
* (i.e. frame length before splicing)
*
* Due to convolution same weights are used repeateadly,
* the final gradient is a sum of all position-specific
* gradients (the sum was found better than averaging).
* For instance, for a convolutional component after raw input,
* if the input is 36-dim fbank feature with delta of order 2
* and spliced using +/- 5 frames of contexts, the convolutional
* component takes the input as a 36 x 33 image. The patch_stride_
* should be configured 36. If patch_step_ and patch_dim_ are
* configured 1 and 7, the Convolutional1dComponent creates a
* 2D filter of 7 x 33, such that the convolution is actually done
* only along the frequency axis. Specifically, the convolutional
* output along the frequency axis is (36 - 7) / 1 + 1 = 30, and
* the convolutional output along the temporal axis is 33 - 33 + 1 = 1,
* resulting in an output image of 30 x 1, which is called a feature map
* in ConvNet. Then if the output-dim is set 3840, the constructor
* would know there should be 3840 / 30 = 128 distinct filters,
* which will create 128 feature maps of 30 x 1 for one frame of
* input. The feature maps are vectorized as a 3840-dim row vector
* in the output matrix of this component. For details on progatation
* of Convolutional1dComponent, check the function definition.
*
*/
class
ConvolutionComponent
:
public
UpdatableComponent
{
class
Convolution
al1d
Component
:
public
UpdatableComponent
{
public:
ConvolutionComponent
();
Convolution
al1d
Component
();
// constructor using another component
ConvolutionComponent
(
const
ConvolutionComponent
&
component
);
Convolution
al1d
Component
(
const
Convolution
al1d
Component
&
component
);
// constructor using parameters
ConvolutionComponent
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
);
Convolution
al1d
Component
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
);
int32
InputDim
()
const
;
int32
OutputDim
()
const
;
...
...
@@ -1718,7 +1744,7 @@ class ConvolutionComponent: public UpdatableComponent {
void
Resize
(
int32
input_dim
,
int32
output_dim
);
std
::
string
Info
()
const
;
void
InitFromString
(
std
::
string
args
);
std
::
string
Type
()
const
{
return
"ConvolutionComponent"
;
}
std
::
string
Type
()
const
{
return
"Convolution
al1d
Component"
;
}
bool
BackpropNeedsInput
()
const
{
return
false
;
}
bool
BackpropNeedsOutput
()
const
{
return
false
;
}
using
Component
::
Propagate
;
// to avoid name hiding
...
...
@@ -1754,7 +1780,7 @@ class ConvolutionComponent: public UpdatableComponent {
int32
patch_step_
;
int32
patch_stride_
;
const
ConvolutionComponent
&
operator
=
(
const
ConvolutionComponent
&
other
);
// Disallow.
const
Convolution
al1d
Component
&
operator
=
(
const
Convolution
al1d
Component
&
other
);
// Disallow.
CuMatrix
<
BaseFloat
>
filter_params_
;
CuVector
<
BaseFloat
>
bias_params_
;
bool
is_gradient_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment