Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Yoann HOUPERT
kaldi_2015
Commits
c3f5a1da
Commit
c3f5a1da
authored
Aug 12, 2015
by
naxingyu
Browse files
efficient convolutional Propagate and Backprop
parent
9d4b994f
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
207 additions
and
55 deletions
+207
-55
src/cudamatrix/cu-kernels-ansi.h
src/cudamatrix/cu-kernels-ansi.h
+2
-0
src/cudamatrix/cu-kernels.cu
src/cudamatrix/cu-kernels.cu
+27
-0
src/cudamatrix/cu-kernels.h
src/cudamatrix/cu-kernels.h
+6
-0
src/cudamatrix/cu-matrix-test.cc
src/cudamatrix/cu-matrix-test.cc
+31
-0
src/cudamatrix/cu-matrix.cc
src/cudamatrix/cu-matrix.cc
+50
-0
src/cudamatrix/cu-matrix.h
src/cudamatrix/cu-matrix.h
+12
-0
src/matrix/kaldi-matrix.cc
src/matrix/kaldi-matrix.cc
+28
-0
src/matrix/kaldi-matrix.h
src/matrix/kaldi-matrix.h
+8
-0
src/nnet2/nnet-component.cc
src/nnet2/nnet-component.cc
+42
-55
src/nnet2/nnet-component.h
src/nnet2/nnet-component.h
+1
-0
No files found.
src/cudamatrix/cu-kernels-ansi.h
View file @
c3f5a1da
...
...
@@ -62,6 +62,7 @@ void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include
void
cudaF_apply_heaviside
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
MatrixDim
d
);
void
cudaF_apply_floor
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
float
floor_val
,
MatrixDim
d
);
void
cudaF_copy_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_apply_ceiling
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
float
ceiling_val
,
MatrixDim
d
);
void
cudaF_set_diag
(
int
Gr
,
int
Bl
,
float
*
mat
,
float
value
,
MatrixDim
d
);
...
...
@@ -190,6 +191,7 @@ void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool inclu
void
cudaD_apply_heaviside
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
MatrixDim
d
);
void
cudaD_apply_floor
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
double
floor_val
,
MatrixDim
d
);
void
cudaD_copy_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_apply_ceiling
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
double
ceiling_val
,
MatrixDim
d
);
void
cudaD_set_diag
(
int
Gr
,
int
Bl
,
double
*
mat
,
double
value
,
MatrixDim
d
);
...
...
src/cudamatrix/cu-kernels.cu
View file @
c3f5a1da
...
...
@@ -1259,6 +1259,25 @@ static void _copy_cols(Real* dst, const Real *src, const MatrixIndexT_cuda* reor
}
}
template
<
typename
Real
>
__global__
static
void
_add_cols
(
Real
*
dst
,
const
Real
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
// Note: in this kernel, the x dimension corresponds to rows and the y to columns,
// as it will be going forward.
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
j
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
i
<
dst_dim
.
rows
&&
j
<
dst_dim
.
cols
)
{
int
index
=
reorder
[
j
],
dst_index
=
i
*
dst_dim
.
stride
+
j
;
if
(
index
>=
0
)
{
int
src_index
=
i
*
src_stride
+
reorder
[
j
];
Real
val
=
src
[
src_index
];
dst
[
dst_index
]
+=
val
;
}
}
}
template
<
typename
Real
>
__global__
static
void
_copy_rows
(
Real
*
dst
,
const
Real
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
...
...
@@ -2024,6 +2043,10 @@ void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const Matri
_copy_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaF_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_add_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaF_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_copy_rows
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
@@ -2445,6 +2468,10 @@ void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const Mat
_copy_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaD_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_add_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaD_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_copy_rows
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
src/cudamatrix/cu-kernels.h
View file @
c3f5a1da
...
...
@@ -92,6 +92,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val,
inline
void
cuda_copy_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_copy_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_add_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_copy_rows
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
@@ -259,6 +262,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val
inline
void
cuda_copy_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_copy_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_add_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_copy_rows
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
src/cudamatrix/cu-matrix-test.cc
View file @
c3f5a1da
...
...
@@ -509,6 +509,36 @@ static void UnitTestCuMatrixCopyCols() {
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixAddCols
()
{
for
(
MatrixIndexT
p
=
0
;
p
<
2
;
p
++
)
{
MatrixIndexT
num_cols1
=
10
+
Rand
()
%
10
,
num_cols2
=
10
+
Rand
()
%
10
,
num_rows
=
10
+
Rand
()
%
10
;
CuMatrix
<
Real
>
M
(
num_rows
,
num_cols1
);
M
.
SetRandn
();
CuMatrix
<
Real
>
N
(
num_rows
,
num_cols2
),
O
(
num_rows
,
num_cols2
);
std
::
vector
<
int32
>
reorder
(
num_cols2
);
for
(
int32
i
=
0
;
i
<
num_cols2
;
i
++
)
reorder
[
i
]
=
-
1
+
(
Rand
()
%
(
num_cols1
+
1
));
if
(
Rand
()
%
2
==
0
)
{
N
.
AddCols
(
M
,
reorder
);
}
else
{
CuArray
<
int32
>
cuda_reorder
(
reorder
);
N
.
AddCols
(
M
,
cuda_reorder
);
}
for
(
int32
i
=
0
;
i
<
num_rows
;
i
++
)
for
(
int32
j
=
0
;
j
<
num_cols2
;
j
++
)
if
(
reorder
[
j
]
<
0
)
O
(
i
,
j
)
=
0
;
else
O
(
i
,
j
)
=
M
(
i
,
reorder
[
j
]);
AssertEqual
(
N
,
O
);
}
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixApplyFloor
()
{
...
...
@@ -2093,6 +2123,7 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixCopyFromTp
<
Real
>
();
UnitTestCuMatrixAddMatTp
<
Real
>
();
UnitTestCuMatrixCopyCols
<
Real
>
();
UnitTestCuMatrixAddCols
<
Real
>
();
UnitTestCuMatrixSumColumnRanges
<
Real
>
();
UnitTestCuMatrixCopyRows
<
Real
>
();
UnitTestCuMatrixCopyRowsFromVec
<
Real
>
();
...
...
src/cudamatrix/cu-matrix.cc
View file @
c3f5a1da
...
...
@@ -1960,6 +1960,56 @@ void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
reorder
)
{
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
KALDI_ASSERT
(
static_cast
<
MatrixIndexT
>
(
reorder
.
size
())
==
NumCols
());
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
#ifdef KALDI_PARANOID
MatrixIndexT
src_cols
=
src
.
NumCols
();
for
(
size_t
i
=
0
;
i
<
reorder
.
size
();
i
++
)
KALDI_ASSERT
(
reorder
[
i
]
>=
-
1
&&
reorder
[
i
]
<
src_cols
);
#endif
CuArray
<
MatrixIndexT
>
cuda_reorder
(
reorder
);
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3
dimGrid
(
n_blocks
(
NumRows
(),
CU2DBLOCK
),
n_blocks
(
NumCols
(),
CU2DBLOCK
));
cuda_add_cols
(
dimGrid
,
dimBlock
,
data_
,
src
.
Data
(),
cuda_reorder
.
Data
(),
Dim
(),
src
.
Stride
());
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
Mat
().
AddCols
(
src
.
Mat
(),
reorder
);
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
reorder
)
{
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
KALDI_ASSERT
(
reorder
.
Dim
()
==
NumCols
());
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3
dimGrid
(
n_blocks
(
NumRows
(),
CU2DBLOCK
),
n_blocks
(
NumCols
(),
CU2DBLOCK
));
cuda_add_cols
(
dimGrid
,
dimBlock
,
data_
,
src
.
Data
(),
reorder
.
Data
(),
Dim
(),
src
.
Stride
());
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
std
::
vector
<
MatrixIndexT
>
reorder_cpu
;
reorder
.
CopyToVec
(
&
reorder_cpu
);
Mat
().
AddCols
(
src
.
Mat
(),
reorder_cpu
);
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
CopyRows
(
const
CuMatrixBase
<
Real
>
&
src
,
...
...
src/cudamatrix/cu-matrix.h
View file @
c3f5a1da
...
...
@@ -98,6 +98,18 @@ class CuMatrixBase {
void
CopyCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
indices
);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Version of CopyCols that takes CuArray argument.
void
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
indices
);
/// Copies row r from row indices[r] of src.
/// As a special case, if indexes[i] <== -1, sets row i to zero
...
...
src/matrix/kaldi-matrix.cc
View file @
c3f5a1da
...
...
@@ -2566,6 +2566,34 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
AddCols
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
)
{
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
KALDI_ASSERT
(
NumCols
()
==
static_cast
<
MatrixIndexT
>
(
indices
.
size
()));
MatrixIndexT
num_rows
=
num_rows_
,
num_cols
=
num_cols_
,
this_stride
=
stride_
,
src_stride
=
src
.
stride_
;
Real
*
this_data
=
this
->
data_
;
const
Real
*
src_data
=
src
.
data_
;
#ifdef KALDI_PARANOID
MatrixIndexT
src_cols
=
src
.
NumCols
();
for
(
std
::
vector
<
MatrixIndexT
>::
const_iterator
iter
=
indices
.
begin
();
iter
!=
indices
.
end
();
++
iter
)
KALDI_ASSERT
(
*
iter
>=
-
1
&&
*
iter
<
src_cols
);
#endif
// For the sake of memory locality we do this row by row, rather
// than doing it column-wise using cublas_Xcopy
for
(
MatrixIndexT
r
=
0
;
r
<
num_rows
;
r
++
,
this_data
+=
this_stride
,
src_data
+=
src_stride
)
{
const
MatrixIndexT
*
index_ptr
=
&
(
indices
[
0
]);
for
(
MatrixIndexT
c
=
0
;
c
<
num_cols
;
c
++
,
index_ptr
++
)
{
if
(
*
index_ptr
>=
0
)
this_data
[
c
]
+=
src_data
[
*
index_ptr
];
}
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
CopyRows
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
)
{
...
...
src/matrix/kaldi-matrix.h
View file @
c3f5a1da
...
...
@@ -284,6 +284,14 @@ class MatrixBase {
void
CopyRows
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void
AddCols
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Applies floor to all matrix elements
void
ApplyFloor
(
Real
floor_val
);
...
...
src/nnet2/nnet-component.cc
View file @
c3f5a1da
...
...
@@ -3872,39 +3872,33 @@ void Convolutional1dComponent::Propagate(const ChunkInfo &in_info,
int32
filter_dim
=
filter_params_
.
NumCols
();
/** Buffer of reshaped inputs:
* 1row = vectorized rectangular feature patch
,
* 1row = vectorized rectangular feature patch
es
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
vectorized_feature_patches_
;
// prepare the buffers
if
(
vectorized_feature_patches_
.
size
()
==
0
)
{
vectorized_feature_patches_
.
resize
(
num_patches
);
}
patches_
.
Resize
(
num_frames
,
filter_dim
*
num_patches
,
kSetZero
);
// column_map is indexed by the column-index of "patches",
// and the value is the corresponding column-index of "in".
std
::
vector
<
int32
>
column_map
;
// vectorize the inputs
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
vectorized_feature_patches_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// build-up a column selection mask:
std
::
vector
<
int32
>
column_mask
;
// build-up a column selection map:
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
column_ma
sk
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
column_ma
p
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
}
}
KALDI_ASSERT
(
column_mask
.
size
()
==
filter_dim
);
// select the columns
vectorized_feature_patches_
[
p
].
CopyCols
(
in
,
column_mask
);
}
KALDI_ASSERT
(
column_map
.
size
()
==
filter_dim
*
num_patches
);
patches_
.
CopyCols
(
in
,
column_map
);
// compute filter activations
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
CuSubMatrix
<
BaseFloat
>
tgt
(
out
->
ColRange
(
p
*
num_filters
,
num_filters
));
CuSubMatrix
<
BaseFloat
>
patch
(
patches_
.
ColRange
(
p
*
filter_dim
,
filter_dim
));
tgt
.
AddVecToRows
(
1.0
,
bias_params_
,
0.0
);
// add bias
// apply all filters
tgt
.
AddMatMat
(
1.0
,
vectorized_feature_patches_
[
p
],
kNoTrans
,
filter_params_
,
kTrans
,
1.0
);
tgt
.
AddMatMat
(
1.0
,
patch
,
kNoTrans
,
filter_params_
,
kTrans
,
1.0
);
}
}
...
...
@@ -3940,33 +3934,36 @@ void Convolutional1dComponent::Backprop(const ChunkInfo &in_info,
int32
filter_dim
=
filter_params_
.
NumCols
();
/** Buffer for backpropagation:
* derivatives in the domain of '
vectorized_feature_
patches_',
* 1row = vectorized rectangular feature patch,
* derivatives in the domain of 'patches_',
* 1row = vectorized rectangular feature patch
es
,
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
feature_patch_diffs_
;
feature_patch_diffs_
.
resize
(
num_patches
);
CuMatrix
<
BaseFloat
>
patches_deriv
;
patches_deriv
.
Resize
(
num_frames
,
filter_dim
*
num_patches
,
kSetZero
);
// backpropagate to vector of matrices
// (corresponding to position of a filter)
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
feature_patch_diffs_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// reset
CuSubMatrix
<
BaseFloat
>
patch_deriv
(
patches_deriv
.
ColRange
(
p
*
filter_dim
,
filter_dim
));
CuSubMatrix
<
BaseFloat
>
out_deriv_patch
(
out_deriv
.
ColRange
(
p
*
num_filters
,
num_filters
));
feature_patch_diffs_
[
p
]
.
AddMatMat
(
1.0
,
out_deriv_patch
,
kNoTrans
,
filter_params_
,
kNoTrans
,
0.0
);
patch_deriv
.
AddMatMat
(
1.0
,
out_deriv_patch
,
kNoTrans
,
filter_params_
,
kNoTrans
,
0.0
);
}
// sum the derivatives into in_deriv, we will compensate #summands
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
// rearranged_column_map is indexed by the column-index c of "in",
// and contains either some column-index d of "patches" such that
// column_map[d] = c, or -1.
std
::
vector
<
int32
>
rearranged_column_map
(
InputDim
(),
-
1
);
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
CuSubMatrix
<
BaseFloat
>
src
(
feature_patch_diffs_
[
p
].
ColRange
(
s
*
patch_dim_
,
patch_dim_
));
CuSubMatrix
<
BaseFloat
>
tgt
(
in_deriv
->
ColRange
(
p
*
patch_step_
+
s
*
patch_stride_
,
patch_dim_
));
tgt
.
AddMat
(
1.0
,
src
);
// sum
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
rearranged_column_map
[
p
*
patch_step_
+
s
*
patch_stride_
+
d
]
=
p
*
filter_dim
+
s
*
patch_dim_
+
d
;
}
}
in_deriv
->
AddCols
(
patches_deriv
,
rearranged_column_map
);
}
if
(
to_update
!=
NULL
)
{
...
...
@@ -4090,32 +4087,22 @@ void Convolutional1dComponent::Update(const CuMatrixBase<BaseFloat> &in_value,
CuMatrix
<
BaseFloat
>
filters_grad
;
CuVector
<
BaseFloat
>
bias_grad
;
/** Buffer of reshaped inputs:
* 1row = vectorized rectangular feature patch,
* 1col = dim over speech frames,
* std::vector-dim = patch-position
*/
std
::
vector
<
CuMatrix
<
BaseFloat
>
>
vectorized_feature_patches_
;
// prepare the buffers
if
(
vectorized_feature_patches_
.
size
()
==
0
)
{
vectorized_feature_patches_
.
resize
(
num_patches
);
}
// vectorize the inputs
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
vectorized_feature_patches_
[
p
].
Resize
(
num_frames
,
filter_dim
,
kSetZero
);
// build-up a column selection mask:
std
::
vector
<
int32
>
column_mask
;
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
column_mask
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
// prepare the buffers if necessary
// (necessary to pass the component test)
if
(
patches_
.
NumRows
()
==
0
)
{
std
::
vector
<
int32
>
column_map
;
patches_
.
Resize
(
num_frames
,
filter_dim
*
num_patches
,
kSetZero
);
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
for
(
int32
s
=
0
;
s
<
num_splice
;
s
++
)
{
for
(
int32
d
=
0
;
d
<
patch_dim_
;
d
++
)
{
column_map
.
push_back
(
p
*
patch_step_
+
s
*
patch_stride_
+
d
);
}
}
}
KALDI_ASSERT
(
column_mask
.
size
()
==
filter_dim
);
// select the columns
vectorized_feature_patches_
[
p
].
CopyCols
(
in_value
,
column_mask
);
KALDI_ASSERT
(
column_map
.
size
()
==
filter_dim
*
num_patches
);
patches_
.
CopyCols
(
in_value
,
column_map
);
}
KALDI_ASSERT
(
patches_
.
NumCols
()
==
filter_dim
*
num_patches
);
//
// calculate the gradient
...
...
@@ -4126,8 +4113,8 @@ void Convolutional1dComponent::Update(const CuMatrixBase<BaseFloat> &in_value,
for
(
int32
p
=
0
;
p
<
num_patches
;
p
++
)
{
// sum
CuSubMatrix
<
BaseFloat
>
diff_patch
(
out_deriv
.
ColRange
(
p
*
num_filters
,
num_filters
));
filters_grad
.
AddMatMat
(
1.0
,
diff_patch
,
kTrans
,
vectorized_feature_patches_
[
p
],
kNoTrans
,
1.0
);
CuSubMatrix
<
BaseFloat
>
patch
(
patches_
.
ColRange
(
p
*
filter_dim
,
filter_dim
));
filters_grad
.
AddMatMat
(
1.0
,
diff_patch
,
kTrans
,
patch
,
kNoTrans
,
1.0
);
bias_grad
.
AddRowSumMat
(
1.0
,
diff_patch
,
1.0
);
}
...
...
src/nnet2/nnet-component.h
View file @
c3f5a1da
...
...
@@ -1783,6 +1783,7 @@ class Convolutional1dComponent: public UpdatableComponent {
const
Convolutional1dComponent
&
operator
=
(
const
Convolutional1dComponent
&
other
);
// Disallow.
CuMatrix
<
BaseFloat
>
filter_params_
;
CuVector
<
BaseFloat
>
bias_params_
;
mutable
CuMatrix
<
BaseFloat
>
patches_
;
bool
is_gradient_
;
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment