Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Abdelwahab HEBA
kaldi_2015
Commits
d773ab9e
Commit
d773ab9e
authored
Aug 03, 2015
by
naxingyu
Browse files
add Convolution component in nnet2
parent
f8b9515a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
543 additions
and
0 deletions
+543
-0
src/nnet2/nnet-component-test.cc
src/nnet2/nnet-component-test.cc
+39
-0
src/nnet2/nnet-component.cc
src/nnet2/nnet-component.cc
+440
-0
src/nnet2/nnet-component.h
src/nnet2/nnet-component.h
+64
-0
No files found.
src/nnet2/nnet-component-test.cc
View file @
d773ab9e
...
...
@@ -337,6 +337,44 @@ void UnitTestAffineComponent() {
}
}
void
UnitTestConvolutionComponent
()
{
BaseFloat
learning_rate
=
0.01
,
param_stddev
=
0.1
,
bias_stddev
=
1.0
;
int32
patch_stride
=
10
,
patch_step
=
1
,
patch_dim
=
4
;
int32
num_patches
=
1
+
(
patch_stride
-
patch_dim
)
/
patch_step
;
int32
num_splice
=
5
+
Rand
()
%
10
,
num_filters
=
5
+
Rand
()
%
10
;
int32
input_dim
=
patch_stride
*
num_splice
;
int32
filter_dim
=
patch_dim
*
num_splice
;
int32
output_dim
=
num_patches
*
num_filters
;
{
ConvolutionComponent
component
;
if
(
Rand
()
%
2
==
0
)
{
component
.
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
param_stddev
,
bias_stddev
);
}
else
{
// initialize the hyper-parameters
component
.
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
param_stddev
,
bias_stddev
);
Matrix
<
BaseFloat
>
mat
(
num_filters
,
filter_dim
+
1
);
mat
.
SetRandn
();
mat
.
Scale
(
param_stddev
);
WriteKaldiObject
(
mat
,
"tmpf"
,
true
);
Sleep
(
0.5
);
component
.
Init
(
learning_rate
,
"tmpf"
);
unlink
(
"tmpf"
);
}
UnitTestGenericComponentInternal
(
component
);
}
{
const
char
*
str
=
"learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10"
;
ConvolutionComponent
component
;
component
.
InitFromString
(
str
);
UnitTestGenericComponentInternal
(
component
);
}
}
void
UnitTestDropoutComponent
()
{
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
...
...
@@ -826,6 +864,7 @@ int main() {
UnitTestFixedBiasComponent
();
UnitTestAffineComponentPreconditioned
();
UnitTestAffineComponentPreconditionedOnline
();
UnitTestConvolutionComponent
();
UnitTestDropoutComponent
();
UnitTestAdditiveNoiseComponent
();
UnitTestParsing
();
...
...
src/nnet2/nnet-component.cc
View file @
d773ab9e
...
...
@@ -102,6 +102,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans
=
new
DropoutComponent
();
}
else
if
(
component_type
==
"AdditiveNoiseComponent"
)
{
ans
=
new
AdditiveNoiseComponent
();
}
else
if
(
component_type
==
"ConvolutionComponent"
)
{
ans
=
new
ConvolutionComponent
();
}
return
ans
;
}
...
...
@@ -3672,5 +3674,443 @@ void AdditiveNoiseComponent::Propagate(const ChunkInfo &in_info,
out
->
AddMat
(
stddev_
,
rand
);
}
ConvolutionComponent
::
ConvolutionComponent
()
:
UpdatableComponent
(),
patch_dim_
(
0
),
patch_step_
(
0
),
patch_stride_
(
0
),
is_gradient_
(
false
)
{}
ConvolutionComponent
::
ConvolutionComponent
(
const
ConvolutionComponent
&
component
)
:
UpdatableComponent
(
component
),
filter_params_
(
component
.
filter_params_
),
bias_params_
(
component
.
bias_params_
),
is_gradient_
(
component
.
is_gradient_
)
{}
ConvolutionComponent
::
ConvolutionComponent
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
)
:
UpdatableComponent
(
learning_rate
),
filter_params_
(
filter_params
),
bias_params_
(
bias_params
)
{
KALDI_ASSERT
(
filter_params
.
NumRows
()
==
bias_params
.
Dim
()
&&
bias_params
.
Dim
()
!=
0
);
is_gradient_
=
false
;
}
// aquire input dim
int32
ConvolutionComponent
::
InputDim
()
const
{
int32
filter_dim
=
filter_params_
.
NumCols
();
int32
num_splice
=
filter_dim
/
patch_dim_
;
return
patch_stride_
*
num_splice
;
}
// aquire output dim
int32
ConvolutionComponent
::
OutputDim
()
const
{
int32
num_filters
=
filter_params_
.
NumRows
();
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
return
num_patches
*
num_filters
;
}
// initialize the component using hyperparameters
void
ConvolutionComponent
::
Init
(
BaseFloat
learning_rate
,
int32
input_dim
,
int32
output_dim
,
int32
patch_dim
,
int32
patch_step
,
int32
patch_stride
,
BaseFloat
param_stddev
,
BaseFloat
bias_stddev
)
{
UpdatableComponent
::
Init
(
learning_rate
);
patch_dim_
=
patch_dim
;
patch_step_
=
patch_step
;
patch_stride_
=
patch_stride
;
int32
num_splice
=
input_dim
/
patch_stride
;
int32
filter_dim
=
num_splice
*
patch_dim
;
int32
num_patches
=
1
+
(
patch_stride
-
patch_dim
)
/
patch_step
;
int32
num_filters
=
output_dim
/
num_patches
;
KALDI_ASSERT
(
input_dim
%
patch_stride
==
0
);
KALDI_ASSERT
((
patch_stride
-
patch_dim
)
%
patch_step
==
0
);
KALDI_ASSERT
(
output_dim
%
num_patches
==
0
);
filter_params_
.
Resize
(
num_filters
,
filter_dim
);
bias_params_
.
Resize
(
num_filters
);
KALDI_ASSERT
(
param_stddev
>=
0.0
&&
bias_stddev
>=
0.0
);
filter_params_
.
SetRandn
();
filter_params_
.
Scale
(
param_stddev
);
bias_params_
.
SetRandn
();
bias_params_
.
Scale
(
bias_stddev
);
}
// initialize the component using predefined matrix file
void
ConvolutionComponent
::
Init
(
BaseFloat
learning_rate
,
std
::
string
matrix_filename
)
{
UpdatableComponent
::
Init
(
learning_rate
);
CuMatrix
<
BaseFloat
>
mat
;
ReadKaldiObject
(
matrix_filename
,
&
mat
);
KALDI_ASSERT
(
mat
.
NumCols
()
>=
2
);
int32
filter_dim
=
mat
.
NumCols
()
-
1
,
num_filters
=
mat
.
NumRows
();
filter_params_
.
Resize
(
num_filters
,
filter_dim
);
bias_params_
.
Resize
(
num_filters
);
filter_params_
.
CopyFromMat
(
mat
.
Range
(
0
,
num_filters
,
0
,
filter_dim
));
bias_params_
.
CopyColFromMat
(
mat
,
filter_dim
);
}
// resize the component, setting the parameters to zero, while
// leaving any other configuration values the same
void
ConvolutionComponent
::
Resize
(
int32
input_dim
,
int32
output_dim
)
{
KALDI_ASSERT
(
input_dim
>
0
&&
output_dim
>
0
);
int32
num_splice
=
input_dim
/
patch_stride_
;
int32
filter_dim
=
num_splice
*
patch_dim_
;
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
output_dim
/
num_patches
;
KALDI_ASSERT
(
input_dim
%
patch_stride_
==
0
);
KALDI_ASSERT
((
patch_stride_
-
patch_dim_
)
%
patch_step_
==
0
);
KALDI_ASSERT
(
output_dim
%
num_patches
==
0
);
filter_params_
.
Resize
(
num_filters
,
filter_dim
);
bias_params_
.
Resize
(
num_filters
);
}
// display information about component
std
::
string
ConvolutionComponent
::
Info
()
const
{
std
::
stringstream
stream
;
BaseFloat
filter_params_size
=
static_cast
<
BaseFloat
>
(
filter_params_
.
NumRows
())
*
static_cast
<
BaseFloat
>
(
filter_params_
.
NumCols
());
BaseFloat
filter_stddev
=
std
::
sqrt
(
TraceMatMat
(
filter_params_
,
filter_params_
,
kTrans
)
/
filter_params_size
),
bias_stddev
=
std
::
sqrt
(
VecVec
(
bias_params_
,
bias_params_
)
/
bias_params_
.
Dim
());
int32
num_splice
=
InputDim
()
/
patch_stride_
;
int32
filter_dim
=
num_splice
*
patch_dim_
;
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
OutputDim
()
/
num_patches
;
stream
<<
Type
()
<<
", input-dim="
<<
InputDim
()
<<
", output-dim="
<<
OutputDim
()
<<
", num-splice="
<<
num_splice
<<
", num-patches="
<<
num_patches
<<
", num-filters="
<<
num_filters
<<
", filter-dim="
<<
filter_dim
<<
", filter-params-stddev="
<<
filter_stddev
<<
", bias-params-stddev="
<<
bias_stddev
<<
", learning-rate="
<<
LearningRate
();
return
stream
.
str
();
}
// initialize the component using configuration file
void
ConvolutionComponent
::
InitFromString
(
std
::
string
args
)
{
std
::
string
orig_args
(
args
);
bool
ok
=
true
;
BaseFloat
learning_rate
=
learning_rate_
;
std
::
string
matrix_filename
;
int32
input_dim
=
-
1
,
output_dim
=
-
1
;
int32
patch_dim
=
-
1
,
patch_step
=
-
1
,
patch_stride
=
-
1
;
ParseFromString
(
"learning-rate"
,
&
args
,
&
learning_rate
);
if
(
ParseFromString
(
"matrix"
,
&
args
,
&
matrix_filename
))
{
// initialize from prefined parameter matrix
Init
(
learning_rate
,
matrix_filename
);
if
(
ParseFromString
(
"input-dim"
,
&
args
,
&
input_dim
))
KALDI_ASSERT
(
input_dim
==
InputDim
()
&&
"input-dim mismatch vs. matrix."
);
if
(
ParseFromString
(
"output-dim"
,
&
args
,
&
output_dim
))
KALDI_ASSERT
(
output_dim
==
OutputDim
()
&&
"output-dim mismatch vs. matrix."
);
}
else
{
// initialize from configuration
ok
=
ok
&&
ParseFromString
(
"input-dim"
,
&
args
,
&
input_dim
);
ok
=
ok
&&
ParseFromString
(
"output-dim"
,
&
args
,
&
output_dim
);
ok
=
ok
&&
ParseFromString
(
"patch-dim"
,
&
args
,
&
patch_dim
);
ok
=
ok
&&
ParseFromString
(
"patch-step"
,
&
args
,
&
patch_step
);
ok
=
ok
&&
ParseFromString
(
"patch-stride"
,
&
args
,
&
patch_stride
);
BaseFloat
param_stddev
=
1.0
/
std
::
sqrt
(
input_dim
),
bias_stddev
=
1.0
;
ParseFromString
(
"param-stddev"
,
&
args
,
&
param_stddev
);
ParseFromString
(
"bias-stddev"
,
&
args
,
&
bias_stddev
);
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
param_stddev
,
bias_stddev
);
}
if
(
!
args
.
empty
())
KALDI_ERR
<<
"Could not process these elements in initializer: "
<<
args
;
if
(
!
ok
)
KALDI_ERR
<<
"Bad initializer "
<<
orig_args
;
}
// propagation function
void
ConvolutionComponent
::
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
{
in_info
.
CheckSize
(
in
);
out_info
.
CheckSize
(
*
out
);
KALDI_ASSERT
(
in_info
.
NumChunks
()
==
out_info
.
NumChunks
());
// dims
int32
num_splice
=
InputDim
()
/
patch_stride_
;
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
filter_params_
.
NumRows
();
int32
num_frames
=
in
.
NumRows
();
int32
filter_dim
=
filter_params_
.
NumCols
();
/** Buffer of reshaped inputs:
* 1row = vectorized rectangular feature patch,
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
vectorized_feature_patches_
;
// prepare the buffers
if
(
vectorized_feature_patches_
.
size
()
==
0
)
{
vectorized_feature_patches_
.
resize
(
num_patches
);
}
// vectorize the inputs
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
vectorized_feature_patches_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// build-up a column selection mask:
std
::
vector
<
int32
>
column_mask
;
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
column_mask
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
}
}
KALDI_ASSERT
(
column_mask
.
size
()
==
filter_dim
);
// select the columns
vectorized_feature_patches_
[
p
].
CopyCols
(
in
,
column_mask
);
}
// compute filter activations
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
CuSubMatrix
<
BaseFloat
>
tgt
(
out
->
ColRange
(
p
*
num_filters
,
num_filters
));
tgt
.
AddVecToRows
(
1.0
,
bias_params_
,
0.0
);
// add bias
// apply all filters
tgt
.
AddMatMat
(
1.0
,
vectorized_feature_patches_
[
p
],
kNoTrans
,
filter_params_
,
kTrans
,
1.0
);
}
}
// scale the parameters
void
ConvolutionComponent
::
Scale
(
BaseFloat
scale
)
{
filter_params_
.
Scale
(
scale
);
bias_params_
.
Scale
(
scale
);
}
// add another convolution component
void
ConvolutionComponent
::
Add
(
BaseFloat
alpha
,
const
UpdatableComponent
&
other_in
)
{
const
ConvolutionComponent
*
other
=
dynamic_cast
<
const
ConvolutionComponent
*>
(
&
other_in
);
KALDI_ASSERT
(
other
!=
NULL
);
filter_params_
.
AddMat
(
alpha
,
other
->
filter_params_
);
bias_params_
.
AddVec
(
alpha
,
other
->
bias_params_
);
}
// back propagation function
void
ConvolutionComponent
::
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update_in
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
{
in_deriv
->
Resize
(
in_value
.
NumRows
(),
in_value
.
NumCols
(),
kSetZero
);
ConvolutionComponent
*
to_update
=
dynamic_cast
<
ConvolutionComponent
*>
(
to_update_in
);
int32
num_splice
=
InputDim
()
/
patch_stride_
;
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
filter_params_
.
NumRows
();
int32
num_frames
=
in_value
.
NumRows
();
int32
filter_dim
=
filter_params_
.
NumCols
();
/** Buffer for backpropagation:
* derivatives in the domain of 'vectorized_feature_patches_',
* 1row = vectorized rectangular feature patch,
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
feature_patch_diffs_
;
feature_patch_diffs_
.
resize
(
num_patches
);
// backpropagate to vector of matrices
// (corresponding to position of a filter)
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
feature_patch_diffs_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// reset
CuSubMatrix
<
BaseFloat
>
out_deriv_patch
(
out_deriv
.
ColRange
(
p
*
num_filters
,
num_filters
));
feature_patch_diffs_
[
p
].
AddMatMat
(
1.0
,
out_deriv_patch
,
kNoTrans
,
filter_params_
,
kNoTrans
,
0.0
);
}
// sum the derivatives into in_deriv, we will compensate #summands
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
CuSubMatrix
<
BaseFloat
>
src
(
feature_patch_diffs_
[
p
].
ColRange
(
s
*
patch_dim_
,
patch_dim_
));
CuSubMatrix
<
BaseFloat
>
tgt
(
in_deriv
->
ColRange
(
p
*
patch_step_
+
s
*
patch_stride_
,
patch_dim_
));
tgt
.
AddMat
(
1.0
,
src
);
// sum
}
}
if
(
to_update
!=
NULL
)
{
// Next update the model (must do this 2nd so the derivatives we propagate
// are accurate, in case this == to_update_in.)
to_update
->
Update
(
in_value
,
out_deriv
);
}
}
void
ConvolutionComponent
::
SetZero
(
bool
treat_as_gradient
)
{
if
(
treat_as_gradient
)
{
SetLearningRate
(
1.0
);
}
filter_params_
.
SetZero
();
bias_params_
.
SetZero
();
if
(
treat_as_gradient
)
{
is_gradient_
=
true
;
}
}
void
ConvolutionComponent
::
Read
(
std
::
istream
&
is
,
bool
binary
)
{
std
::
ostringstream
ostr_beg
,
ostr_end
;
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<ConvolutionComponent>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</ConvolutionComponent>"
// might not see the "<ConvolutionComponent>" part because
// of how ReadNew() works.
ExpectOneOrTwoTokens
(
is
,
binary
,
ostr_beg
.
str
(),
"<LearningRate>"
);
ReadBasicType
(
is
,
binary
,
&
learning_rate_
);
ExpectOneOrTwoTokens
(
is
,
binary
,
ostr_beg
.
str
(),
"<PatchDim>"
);
ReadBasicType
(
is
,
binary
,
&
patch_dim_
);
ExpectOneOrTwoTokens
(
is
,
binary
,
ostr_beg
.
str
(),
"<PatchStep>"
);
ReadBasicType
(
is
,
binary
,
&
patch_step_
);
ExpectOneOrTwoTokens
(
is
,
binary
,
ostr_beg
.
str
(),
"<PatchStride>"
);
ReadBasicType
(
is
,
binary
,
&
patch_stride_
);
ExpectToken
(
is
,
binary
,
"<FilterParams>"
);
filter_params_
.
Read
(
is
,
binary
);
ExpectToken
(
is
,
binary
,
"<BiasParams>"
);
bias_params_
.
Read
(
is
,
binary
);
std
::
string
tok
;
ReadToken
(
is
,
binary
,
&
tok
);
if
(
tok
==
"<IsGradient>"
)
{
ReadBasicType
(
is
,
binary
,
&
is_gradient_
);
ExpectToken
(
is
,
binary
,
ostr_end
.
str
());
}
else
{
is_gradient_
=
false
;
KALDI_ASSERT
(
tok
==
ostr_end
.
str
());
}
}
void
ConvolutionComponent
::
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
{
std
::
ostringstream
ostr_beg
,
ostr_end
;
ostr_beg
<<
"<"
<<
Type
()
<<
">"
;
// e.g. "<ConvolutionComponent>"
ostr_end
<<
"</"
<<
Type
()
<<
">"
;
// e.g. "</ConvolutionComponent>"
WriteToken
(
os
,
binary
,
ostr_beg
.
str
());
WriteToken
(
os
,
binary
,
"<LearningRate>"
);
WriteBasicType
(
os
,
binary
,
learning_rate_
);
WriteToken
(
os
,
binary
,
"<PatchDim>"
);
WriteBasicType
(
os
,
binary
,
patch_dim_
);
WriteToken
(
os
,
binary
,
"<PatchStep>"
);
WriteBasicType
(
os
,
binary
,
patch_step_
);
WriteToken
(
os
,
binary
,
"<PatchStride>"
);
WriteBasicType
(
os
,
binary
,
patch_stride_
);
WriteToken
(
os
,
binary
,
"<FilterParams>"
);
filter_params_
.
Write
(
os
,
binary
);
WriteToken
(
os
,
binary
,
"<BiasParams>"
);
bias_params_
.
Write
(
os
,
binary
);
WriteToken
(
os
,
binary
,
"<IsGradient>"
);
WriteBasicType
(
os
,
binary
,
is_gradient_
);
WriteToken
(
os
,
binary
,
ostr_end
.
str
());
}
BaseFloat
ConvolutionComponent
::
DotProduct
(
const
UpdatableComponent
&
other_in
)
const
{
const
ConvolutionComponent
*
other
=
dynamic_cast
<
const
ConvolutionComponent
*>
(
&
other_in
);
return
TraceMatMat
(
filter_params_
,
other
->
filter_params_
,
kTrans
)
+
VecVec
(
bias_params_
,
other
->
bias_params_
);
}
Component
*
ConvolutionComponent
::
Copy
()
const
{
ConvolutionComponent
*
ans
=
new
ConvolutionComponent
();
ans
->
learning_rate_
=
learning_rate_
;
ans
->
patch_dim_
=
patch_dim_
;
ans
->
patch_step_
=
patch_step_
;
ans
->
patch_stride_
=
patch_stride_
;
ans
->
filter_params_
=
filter_params_
;
ans
->
bias_params_
=
bias_params_
;
ans
->
is_gradient_
=
is_gradient_
;
return
ans
;
}
void
ConvolutionComponent
::
PerturbParams
(
BaseFloat
stddev
)
{
CuMatrix
<
BaseFloat
>
temp_filter_params
(
filter_params_
);
temp_filter_params
.
SetRandn
();
filter_params_
.
AddMat
(
stddev
,
temp_filter_params
);
CuVector
<
BaseFloat
>
temp_bias_params
(
bias_params_
);
temp_bias_params
.
SetRandn
();
bias_params_
.
AddVec
(
stddev
,
temp_bias_params
);
}
void
ConvolutionComponent
::
SetParams
(
const
VectorBase
<
BaseFloat
>
&
bias
,
const
MatrixBase
<
BaseFloat
>
&
filter
)
{
bias_params_
=
bias
;
filter_params_
=
filter
;
KALDI_ASSERT
(
bias_params_
.
Dim
()
==
filter_params_
.
NumRows
());
}
int32
ConvolutionComponent
::
GetParameterDim
()
const
{
return
(
filter_params_
.
NumCols
()
+
1
)
*
filter_params_
.
NumRows
();
}
// update parameters
void
ConvolutionComponent
::
Update
(
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
)
{
// useful dims
int32
num_patches
=
1
+
(
patch_stride_
-
patch_dim_
)
/
patch_step_
;
int32
num_filters
=
filter_params_
.
NumRows
();
int32
filter_dim
=
filter_params_
.
NumCols
();
int32
num_frames
=
in_value
.
NumRows
();
int32
num_splice
=
InputDim
()
/
patch_stride_
;
CuMatrix
<
BaseFloat
>
filters_grad
;
CuVector
<
BaseFloat
>
bias_grad
;
/** Buffer of reshaped inputs:
* 1row = vectorized rectangular feature patch,
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
vectorized_feature_patches_
;
// prepare the buffers
if
(
vectorized_feature_patches_
.
size
()
==
0
)
{
vectorized_feature_patches_
.
resize
(
num_patches
);
}
// vectorize the inputs
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
vectorized_feature_patches_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// build-up a column selection mask:
std
::
vector
<
int32
>
column_mask
;
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
column_mask
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
}
}
KALDI_ASSERT
(
column_mask
.
size
()
==
filter_dim
);
// select the columns
vectorized_feature_patches_
[
p
].
CopyCols
(
in_value
,
column_mask
);
}
//
// calculate the gradient
//
filters_grad
.
Resize
(
num_filters
,
filter_dim
,
kSetZero
);
// reset
bias_grad
.
Resize
(
num_filters
,
kSetZero
);
// reset
// use all the patches
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
// sum
CuSubMatrix
<
BaseFloat
>
diff_patch
(
out_deriv
.
ColRange
(
p
*
num_filters
,
num_filters
));
filters_grad
.
AddMatMat
(
1.0
,
diff_patch
,
kTrans
,
vectorized_feature_patches_
[
p
],
kNoTrans
,
1.0
);
bias_grad
.
AddRowSumMat
(
1.0
,
diff_patch
,
1.0
);
}
//
// update
//
filter_params_
.
AddMat
(
learning_rate_
,
filters_grad
);
bias_params_
.
AddVec
(
learning_rate_
,
bias_grad
);
}
}
// namespace nnet2
}
// namespace kaldi
src/nnet2/nnet-component.h
View file @
d773ab9e
...
...
@@ -1613,6 +1613,70 @@ class AdditiveNoiseComponent: public RandomComponent {
BaseFloat
stddev_
;
};
class
ConvolutionComponent
:
public
UpdatableComponent
{
public:
ConvolutionComponent
();
// constructor using another component
ConvolutionComponent
(
const
ConvolutionComponent
&
component
);
// constructor using parameters
ConvolutionComponent
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
);
int32
InputDim
()
const
;
int32
OutputDim
()
const
;
void
Init
(
BaseFloat
learning_rate
,
int32
input_dim
,
int32
output_dim
,
int32
patch_dim
,
int32
patch_step
,
int32
patch_stride
,
BaseFloat
param_stddev
,
BaseFloat
bias_stddev
);
void
Init
(
BaseFloat
learning_rate
,
std
::
string
matrix_filename
);
// resize the component, setting the parameters to zero, while
// leaving any other configuration values the same
void
Resize
(
int32
input_dim
,
int32
output_dim
);
std
::
string
Info
()
const
;
void
InitFromString
(
std
::
string
args
);
std
::
string
Type
()
const
{
return
"ConvolutionComponent"
;
}
bool
BackpropNeedsInput
()
const
{
return
true
;
}
bool
BackpropNeedsOutput
()
const
{
return
false
;
}
using
Component
::
Propagate
;
// to avoid name hiding
void
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
;
void
Scale
(
BaseFloat
scale
);
virtual
void
Add
(
BaseFloat
alpha
,
const
UpdatableComponent
&
other
);
virtual
void
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update_in
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
;
void
SetZero
(
bool
treat_as_gradient
);
void
Read
(
std
::
istream
&
is
,
bool
binary
);
void
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
;
virtual
BaseFloat
DotProduct
(
const
UpdatableComponent
&
other
)
const
;
Component
*
Copy
()
const
;
void
PerturbParams
(
BaseFloat
stddev
);
void
SetParams
(
const
VectorBase
<
BaseFloat
>
&
bias
,
const
MatrixBase
<
BaseFloat
>
&
filter
);
const
CuVector
<
BaseFloat
>
&
BiasParams
()
{
return
bias_params_
;
}
const
CuMatrix
<
BaseFloat
>
&
LinearParams
()
{
return
filter_params_
;
}
int32
GetParameterDim
()
const
;
void
Update
(
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
);
private:
int32
patch_dim_
;
int32
patch_step_
;
int32
patch_stride_
;
const
ConvolutionComponent
&
operator
=
(
const
ConvolutionComponent
&
other
);
// Disallow.
CuMatrix
<
BaseFloat
>
filter_params_
;
CuVector
<
BaseFloat
>
bias_params_
;
bool
is_gradient_
;
};
/// Functions used in Init routines. Suppose name=="foo", if "string" has a
/// field like foo=12, this function will set "param" to 12 and remove that
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment