Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Abdelwahab HEBA
kaldi_2015
Commits
87337b60
Commit
87337b60
authored
Jul 28, 2015
by
naxingyu
Browse files
update Maxout GPU-CPU and test code
parent
4881b72e
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
279 additions
and
28 deletions
+279
-28
src/cudamatrix/cu-kernels-ansi.h
src/cudamatrix/cu-kernels-ansi.h
+4
-0
src/cudamatrix/cu-kernels.cu
src/cudamatrix/cu-kernels.cu
+60
-0
src/cudamatrix/cu-kernels.h
src/cudamatrix/cu-kernels.h
+4
-0
src/cudamatrix/cu-matrix-speed-test.cc
src/cudamatrix/cu-matrix-speed-test.cc
+37
-0
src/cudamatrix/cu-matrix-test.cc
src/cudamatrix/cu-matrix-test.cc
+56
-0
src/cudamatrix/cu-matrix.cc
src/cudamatrix/cu-matrix.cc
+47
-0
src/cudamatrix/cu-matrix.h
src/cudamatrix/cu-matrix.h
+16
-0
src/matrix/kaldi-matrix.cc
src/matrix/kaldi-matrix.cc
+40
-0
src/matrix/kaldi-matrix.h
src/matrix/kaldi-matrix.h
+12
-1
src/nnet2/nnet-component.cc
src/nnet2/nnet-component.cc
+3
-27
No files found.
src/cudamatrix/cu-kernels-ansi.h
View file @
87337b60
...
...
@@ -80,6 +80,7 @@ void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, Matrix
void
cudaF_mul_rows_vec
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
const
float
*
scale
,
MatrixDim
d
);
void
cudaF_mul_rows_group_mat
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaF_calc_pnorm_deriv
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x1
,
const
float
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
float
power
);
void
cudaF_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x1
,
const
float
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaF_div_rows_vec
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
const
float
*
vec_div
,
MatrixDim
d
);
void
cudaF_add_mat
(
dim3
Gr
,
dim3
Bl
,
float
alpha
,
const
float
*
src
,
float
*
dst
,
MatrixDim
d
,
int
src_stride
,
int
A_trans
);
void
cudaF_add_mat_mat_div_mat
(
dim3
Gr
,
dim3
Bl
,
const
float
*
A
,
const
float
*
B
,
const
float
*
C
,
float
*
dst
,
MatrixDim
d
,
int
stride_a
,
int
stride_b
,
int
stride_c
);
...
...
@@ -131,6 +132,7 @@ void cudaF_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, Matrix
void
cudaF_log_softmax_reduce
(
size_t
Gr
,
size_t
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaF_soft_hinge
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaF_group_pnorm
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
float
power
);
void
cudaF_group_max
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaF_sigmoid
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaF_diff_sigmoid
(
dim3
Gr
,
dim3
Bl
,
float
*
eout
,
const
float
*
e
,
const
float
*
y
,
MatrixDim
d
,
int
e_stride
,
int
y_stride
);
void
cudaF_tanh
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
);
...
...
@@ -206,6 +208,7 @@ void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, Matr
void
cudaD_mul_rows_vec
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
const
double
*
scale
,
MatrixDim
d
);
void
cudaD_mul_rows_group_mat
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaD_calc_pnorm_deriv
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x1
,
const
double
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
double
power
);
void
cudaD_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x1
,
const
double
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaD_div_rows_vec
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
const
double
*
vec_div
,
MatrixDim
d
);
void
cudaD_add_mat
(
dim3
Gr
,
dim3
Bl
,
double
alpha
,
const
double
*
src
,
double
*
dst
,
MatrixDim
d
,
int
src_stride
,
int
A_trans
);
void
cudaD_add_mat_mat_div_mat
(
dim3
Gr
,
dim3
Bl
,
const
double
*
A
,
const
double
*
B
,
const
double
*
C
,
double
*
dst
,
MatrixDim
d
,
int
stride_a
,
int
stride_b
,
int
stride_c
);
...
...
@@ -260,6 +263,7 @@ void cudaD_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, Matr
void
cudaD_log_softmax_reduce
(
size_t
Gr
,
size_t
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaD_soft_hinge
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaD_group_pnorm
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
double
power
);
void
cudaD_group_max
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
);
void
cudaD_sigmoid
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
);
void
cudaD_diff_sigmoid
(
dim3
Gr
,
dim3
Bl
,
double
*
eout
,
const
double
*
e
,
const
double
*
y
,
MatrixDim
d
,
int
e_stride
,
int
y_stride
);
void
cudaD_tanh
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
);
...
...
src/cudamatrix/cu-kernels.cu
View file @
87337b60
...
...
@@ -474,6 +474,24 @@ static void _calc_pnorm_deriv(Real *deriv, const Real *vec, const Real *norm,
}
}
/// deriv is the derivative we will output; vec is the input we're computing
/// the group max on, "maxv" is the previously computed group max.
template
<
typename
Real
>
__global__
static
void
_calc_group_max_deriv
(
Real
*
deriv
,
const
Real
*
vec
,
const
Real
*
maxv
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
j
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
j
<
d
.
rows
&&
i
<
d
.
cols
)
{
int
dst_index
=
i
+
j
*
d
.
stride
,
src_index
=
i
/
group_size
+
j
*
src_stride
;
Real
vec_element
=
vec
[
dst_index
],
// this is the element of the original vector.
max_element
=
maxv
[
src_index
];
// this is the max value
Real
ans
=
(
max_element
==
vec_element
?
1.0
:
0.0
);
deriv
[
dst_index
]
=
ans
;
}
}
/// Set each element to y = (x == orig ? changed : x).
template
<
typename
Real
>
__global__
...
...
@@ -1521,6 +1539,27 @@ static void _group_pnorm(Real *y, const Real *x, MatrixDim d, int src_stride,
}
}
}
template
<
typename
Real
>
__global__
static
void
_group_max
(
Real
*
y
,
const
Real
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
j
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
j
<
d
.
rows
&&
i
<
d
.
cols
)
{
int
dst_index
=
i
+
j
*
d
.
stride
;
int
src_begin_index
=
i
*
group_size
+
j
*
src_stride
;
Real
max_value
=
-
1e20
;
int
src_end_index
=
src_begin_index
+
group_size
;
for
(
int
src_index
=
src_begin_index
;
src_index
<
src_end_index
;
src_index
++
)
{
if
(
!
isnan
(
x
[
src_index
])
&&
x
[
src_index
]
>
max_value
)
max_value
=
x
[
src_index
];
}
y
[
dst_index
]
=
max_value
;
}
}
/*
* cu::
*/
...
...
@@ -2060,6 +2099,12 @@ void cudaF_calc_pnorm_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1,
_calc_pnorm_deriv
<<<
Gr
,
Bl
>>>
(
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
,
power
);
}
void
cudaF_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x1
,
const
float
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
_calc_group_max_deriv
<<<
Gr
,
Bl
>>>
(
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
);
}
void
cudaF_div_rows_vec
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
const
float
*
vec_div
,
MatrixDim
d
)
{
_div_rows_vec
<<<
Gr
,
Bl
>>>
(
mat
,
vec_div
,
d
);
}
...
...
@@ -2234,6 +2279,10 @@ void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d,
_group_pnorm
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
,
group_size
,
power
);
}
void
cudaF_group_max
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
_group_max
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
,
group_size
);
}
void
cudaF_sigmoid
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
)
{
_sigmoid
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
);
}
...
...
@@ -2471,6 +2520,12 @@ void cudaD_calc_pnorm_deriv(dim3 Gr, dim3 Bl, double*y, const double* x1,
_calc_pnorm_deriv
<<<
Gr
,
Bl
>>>
(
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
,
power
);
}
void
cudaD_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x1
,
const
double
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
_calc_group_max_deriv
<<<
Gr
,
Bl
>>>
(
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
);
}
void
cudaD_div_rows_vec
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
const
double
*
vec_div
,
MatrixDim
d
)
{
_div_rows_vec
<<<
Gr
,
Bl
>>>
(
mat
,
vec_div
,
d
);
}
...
...
@@ -2645,6 +2700,11 @@ void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double* y, const double* x, MatrixDim d
_group_pnorm
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
,
group_size
,
power
);
}
void
cudaD_group_max
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
_group_max
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
,
group_size
);
}
void
cudaD_sigmoid
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
)
{
_sigmoid
<<<
Gr
,
Bl
>>>
(
y
,
x
,
d
,
src_stride
);
}
...
...
src/cudamatrix/cu-kernels.h
View file @
87337b60
...
...
@@ -116,6 +116,7 @@ inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale,
inline
void
cuda_mul_rows_vec
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
const
float
*
scale
,
MatrixDim
d
)
{
cudaF_mul_rows_vec
(
Gr
,
Bl
,
mat
,
scale
,
d
);
}
inline
void
cuda_mul_rows_group_mat
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaF_mul_rows_group_mat
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
);
}
inline
void
cuda_calc_pnorm_deriv
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x1
,
const
float
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
float
power
)
{
cudaF_calc_pnorm_deriv
(
Gr
,
Bl
,
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
,
power
);
}
inline
void
cuda_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x1
,
const
float
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaF_calc_group_max_deriv
(
Gr
,
Bl
,
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
);
}
inline
void
cuda_add_mat
(
dim3
Gr
,
dim3
Bl
,
float
alpha
,
const
float
*
src
,
float
*
dst
,
MatrixDim
d
,
int
src_stride
,
int
A_trans
)
{
cudaF_add_mat
(
Gr
,
Bl
,
alpha
,
src
,
dst
,
d
,
src_stride
,
A_trans
);
}
inline
void
cuda_add_mat_mat_div_mat
(
dim3
Gr
,
dim3
Bl
,
const
float
*
A
,
const
float
*
B
,
const
float
*
C
,
float
*
dst
,
MatrixDim
d
,
int
stride_a
,
int
stride_b
,
int
stride_c
)
{
cudaF_add_mat_mat_div_mat
(
Gr
,
Bl
,
A
,
B
,
C
,
dst
,
d
,
stride_a
,
stride_b
,
stride_c
);
}
inline
void
cuda_add_vec_to_cols
(
dim3
Gr
,
dim3
Bl
,
float
alpha
,
const
float
*
col
,
float
beta
,
float
*
dst
,
MatrixDim
d
)
{
cudaF_add_vec_to_cols
(
Gr
,
Bl
,
alpha
,
col
,
beta
,
dst
,
d
);
}
...
...
@@ -182,6 +183,7 @@ inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_dat
*/
inline
void
cuda_soft_hinge
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaF_soft_hinge
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
inline
void
cuda_group_pnorm
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
float
power
)
{
cudaF_group_pnorm
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
,
power
);}
inline
void
cuda_group_max
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaF_group_max
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
);}
inline
void
cuda_sigmoid
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaF_sigmoid
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
inline
void
cuda_diff_sigmoid
(
dim3
Gr
,
dim3
Bl
,
float
*
eout
,
const
float
*
e
,
const
float
*
y
,
MatrixDim
d
,
int
e_stride
,
int
y_stride
)
{
cudaF_diff_sigmoid
(
Gr
,
Bl
,
eout
,
e
,
y
,
d
,
e_stride
,
y_stride
);
}
inline
void
cuda_tanh
(
dim3
Gr
,
dim3
Bl
,
float
*
y
,
const
float
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaF_tanh
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
...
...
@@ -281,6 +283,7 @@ inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale
inline
void
cuda_mul_rows_vec
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
const
double
*
scale
,
MatrixDim
d
)
{
cudaD_mul_rows_vec
(
Gr
,
Bl
,
mat
,
scale
,
d
);
}
inline
void
cuda_mul_rows_group_mat
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaD_mul_rows_group_mat
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
);
}
inline
void
cuda_calc_pnorm_deriv
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x1
,
const
double
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
double
power
)
{
cudaD_calc_pnorm_deriv
(
Gr
,
Bl
,
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
,
power
);
}
inline
void
cuda_calc_group_max_deriv
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x1
,
const
double
*
x2
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaD_calc_group_max_deriv
(
Gr
,
Bl
,
y
,
x1
,
x2
,
d
,
src_stride
,
group_size
);
}
inline
void
cuda_add_mat
(
dim3
Gr
,
dim3
Bl
,
double
alpha
,
const
double
*
src
,
double
*
dst
,
MatrixDim
d
,
int
src_stride
,
int
A_trans
)
{
cudaD_add_mat
(
Gr
,
Bl
,
alpha
,
src
,
dst
,
d
,
src_stride
,
A_trans
);
}
inline
void
cuda_add_mat_mat_div_mat
(
dim3
Gr
,
dim3
Bl
,
const
double
*
A
,
const
double
*
B
,
const
double
*
C
,
double
*
dst
,
MatrixDim
d
,
int
stride_a
,
int
stride_b
,
int
stride_c
)
{
cudaD_add_mat_mat_div_mat
(
Gr
,
Bl
,
A
,
B
,
C
,
dst
,
d
,
stride_a
,
stride_b
,
stride_c
);
}
inline
void
cuda_add_vec_to_cols
(
dim3
Gr
,
dim3
Bl
,
double
alpha
,
const
double
*
col
,
double
beta
,
double
*
dst
,
MatrixDim
d
)
{
cudaD_add_vec_to_cols
(
Gr
,
Bl
,
alpha
,
col
,
beta
,
dst
,
d
);
}
...
...
@@ -343,6 +346,7 @@ inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_dat
*/
inline
void
cuda_soft_hinge
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaD_soft_hinge
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
inline
void
cuda_group_pnorm
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
,
double
power
)
{
cudaD_group_pnorm
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
,
power
);
}
inline
void
cuda_group_max
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
,
int
group_size
)
{
cudaD_group_max
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
,
group_size
);
}
inline
void
cuda_sigmoid
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaD_sigmoid
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
inline
void
cuda_diff_sigmoid
(
dim3
Gr
,
dim3
Bl
,
double
*
eout
,
const
double
*
e
,
const
double
*
y
,
MatrixDim
d
,
int
e_stride
,
int
y_stride
)
{
cudaD_diff_sigmoid
(
Gr
,
Bl
,
eout
,
e
,
y
,
d
,
e_stride
,
y_stride
);
}
inline
void
cuda_tanh
(
dim3
Gr
,
dim3
Bl
,
double
*
y
,
const
double
*
x
,
MatrixDim
d
,
int
src_stride
)
{
cudaD_tanh
(
Gr
,
Bl
,
y
,
x
,
d
,
src_stride
);
}
...
...
src/cudamatrix/cu-matrix-speed-test.cc
View file @
87337b60
...
...
@@ -311,6 +311,39 @@ template<typename Real> void TestCuMatrixGroupPnormDeriv(int32 dim) {
<<
dim
<<
", speed was "
<<
gflops
<<
" gigaflops."
;
}
template
<
typename
Real
>
void
TestCuMatrixGroupMax
(
int32
dim
)
{
BaseFloat
time_in_secs
=
0.025
;
int32
group_size
=
4
;
CuMatrix
<
Real
>
M
(
dim
,
dim
),
N
(
dim
,
dim
/
group_size
);
M
.
SetRandn
();
Timer
tim
;
int32
iter
=
0
;
for
(;
tim
.
Elapsed
()
<
time_in_secs
;
iter
++
)
N
.
GroupMax
(
M
);
BaseFloat
fdim
=
dim
;
BaseFloat
gflops
=
(
fdim
*
fdim
*
iter
)
/
(
tim
.
Elapsed
()
*
1.0e+09
);
KALDI_LOG
<<
"For CuMatrix::GroupMax"
<<
NameOf
<
Real
>
()
<<
", for dim = "
<<
dim
<<
", speed was "
<<
gflops
<<
" gigaflops."
;
}
template
<
typename
Real
>
void
TestCuMatrixGroupMaxDeriv
(
int32
dim
)
{
BaseFloat
time_in_secs
=
0.025
;
int32
group_size
=
4
;
CuMatrix
<
Real
>
M
(
dim
,
dim
),
N
(
dim
,
dim
/
group_size
),
O
(
dim
,
dim
);
M
.
SetRandn
();
N
.
GroupMax
(
M
);
Timer
tim
;
int32
iter
=
0
;
for
(;
tim
.
Elapsed
()
<
time_in_secs
;
iter
++
)
O
.
GroupMaxDeriv
(
M
,
N
);
BaseFloat
fdim
=
dim
;
BaseFloat
gflops
=
(
fdim
*
fdim
*
iter
)
/
(
tim
.
Elapsed
()
*
1.0e+09
);
KALDI_LOG
<<
"For CuMatrix::GroupMaxDeriv"
<<
NameOf
<
Real
>
()
<<
", for dim = "
<<
dim
<<
", speed was "
<<
gflops
<<
" gigaflops."
;
}
template
<
typename
Real
>
void
TestCuMatrixTraceMatMat
(
int32
dim
)
{
for
(
int32
n
=
0
;
n
<
2
;
n
++
)
{
...
...
@@ -517,6 +550,10 @@ template<typename Real> void CudaMatrixSpeedTest() {
TestCuMatrixGroupPnorm
<
Real
>
(
sizes
[
s
]);
for
(
int32
s
=
0
;
s
<
ns
;
s
++
)
TestCuMatrixGroupPnormDeriv
<
Real
>
(
sizes
[
s
]);
for
(
int32
s
=
0
;
s
<
ns
;
s
++
)
TestCuMatrixGroupMax
<
Real
>
(
sizes
[
s
]);
for
(
int32
s
=
0
;
s
<
ns
;
s
++
)
TestCuMatrixGroupMaxDeriv
<
Real
>
(
sizes
[
s
]);
for
(
int32
s
=
0
;
s
<
ns
;
s
++
)
TestCuMatrixTraceMatMat
<
Real
>
(
sizes
[
s
]);
for
(
int32
s
=
0
;
s
<
ns
;
s
++
)
...
...
src/cudamatrix/cu-matrix-test.cc
View file @
87337b60
...
...
@@ -266,6 +266,27 @@ static void UnitTestCuMatrixGroupPnorm() {
}
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixGroupMax
()
{
int32
M
=
100
+
Rand
()
%
200
,
N
=
100
+
Rand
()
%
200
;
// M = 256; N = 256;
for
(
int32
K
=
5
;
K
<
7
;
K
++
)
{
int32
N_src
=
N
*
K
;
Matrix
<
Real
>
H_src
(
M
,
N_src
);
H_src
.
SetRandn
();
if
(
rand
()
%
2
==
0
)
H_src
.
ApplyFloor
(
0.0
);
// will put some zeros in the matrix.. harder to
// do derivatives.
Matrix
<
Real
>
H
(
M
,
N
);
H
.
GroupMax
(
H_src
);
CuMatrix
<
Real
>
D
(
H_src
);
CuMatrix
<
Real
>
E
(
M
,
N
);
E
.
GroupMax
(
D
);
Matrix
<
Real
>
H2
(
E
);
AssertEqual
(
H
,
H2
);
}
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixSet
()
{
for
(
int32
i
=
0
;
i
<
2
;
i
++
)
{
...
...
@@ -686,6 +707,39 @@ static void UnitTestCuMatrixGroupPnormDeriv() {
AssertEqual
(
Hr
,
Hr2
);
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixGroupMaxDeriv
()
{
int32
dimM
=
100
+
Rand
()
%
200
,
dimNs
=
100
+
Rand
()
%
200
;
int32
group_size
=
1
+
Rand
()
%
10
;
// int32 dimM = 256, dimNs = 2;
// int32 group_size = 2;
int32
dimN
=
group_size
*
dimNs
;
Matrix
<
Real
>
Hm
(
dimM
,
dimN
);
Matrix
<
Real
>
Hr
(
dimM
,
dimN
);
Matrix
<
Real
>
Hs
(
dimM
,
dimNs
);
Hs
.
SetRandn
();
if
(
rand
()
%
2
==
0
)
Hm
.
ApplyFloor
(
0.0
);
// will put some zeros in the matrix.. harder to
// do derivatives.
Hs
.
GroupMax
(
Hm
);
CuMatrix
<
Real
>
Dm
(
dimM
,
dimN
);
CuMatrix
<
Real
>
Dr
(
dimM
,
dimN
);
CuMatrix
<
Real
>
Ds
(
dimM
,
dimNs
);
Dm
.
CopyFromMat
(
Hm
);
Dr
.
CopyFromMat
(
Hr
);
Ds
.
CopyFromMat
(
Hs
);
// KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
Dr
.
GroupMaxDeriv
(
Dm
,
Ds
);
Hr
.
GroupMaxDeriv
(
Hm
,
Hs
);
// KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
Matrix
<
Real
>
Hr2
(
dimM
,
dimN
);
Dr
.
CopyToMat
(
&
Hr2
);
AssertEqual
(
Hr
,
Hr2
);
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixAddDiagVecMat
()
{
for
(
int
p
=
0
;
p
<
4
;
p
++
)
{
MatrixIndexT
dimM
=
100
+
Rand
()
%
255
,
dimN
=
100
+
Rand
()
%
255
;
...
...
@@ -2072,6 +2126,8 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuDiffSigmoid
<
Real
>
();
UnitTestCuMatrixGroupPnorm
<
Real
>
();
UnitTestCuMatrixGroupPnormDeriv
<
Real
>
();
UnitTestCuMatrixGroupMax
<
Real
>
();
UnitTestCuMatrixGroupMaxDeriv
<
Real
>
();
UnitTestCuMatrixMulRowsVec
<
Real
>
();
UnitTestCuMatrixMulRowsGroupMat
<
Real
>
();
UnitTestCuFindRowMaxId
<
Real
>
();
...
...
src/cudamatrix/cu-matrix.cc
View file @
87337b60
...
...
@@ -759,6 +759,32 @@ void CuMatrixBase<Real>::GroupPnormDeriv(const CuMatrixBase<Real> &src1,
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
GroupMaxDeriv
(
const
CuMatrixBase
<
Real
>
&
src1
,
const
CuMatrixBase
<
Real
>
&
src2
)
{
KALDI_ASSERT
(
src2
.
NumCols
()
>
0
);
int
group_size
=
this
->
NumCols
()
/
src2
.
NumCols
();
KALDI_ASSERT
(
this
->
NumCols
()
==
src2
.
NumCols
()
*
group_size
);
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
dim3
dimGrid
(
n_blocks
(
NumCols
(),
CU2DBLOCK
),
n_blocks
(
NumRows
(),
CU2DBLOCK
));
cuda_calc_group_max_deriv
(
dimGrid
,
dimBlock
,
this
->
data_
,
src1
.
Data
(),
src2
.
Data
(),
Dim
(),
src2
.
Stride
(),
group_size
);
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
Mat
().
GroupMaxDeriv
(
src1
.
Mat
(),
src2
.
Mat
());
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
DivRowsVec
(
const
CuVectorBase
<
Real
>
&
div
)
{
#if HAVE_CUDA == 1
...
...
@@ -1137,6 +1163,27 @@ void CuMatrixBase<Real>::GroupPnorm(const CuMatrixBase<Real> &src, Real power) {
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
GroupMax
(
const
CuMatrixBase
<
Real
>
&
src
)
{
int
group_size
=
src
.
NumCols
()
/
this
->
NumCols
();
KALDI_ASSERT
(
src
.
NumCols
()
==
this
->
NumCols
()
*
group_size
&&
this
->
NumRows
()
==
src
.
NumRows
());
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
dim3
dimGrid
(
n_blocks
(
src
.
NumCols
(),
CU2DBLOCK
),
n_blocks
(
src
.
NumRows
(),
CU2DBLOCK
));
cuda_group_max
(
dimGrid
,
dimBlock
,
this
->
data_
,
src
.
data_
,
this
->
Dim
(),
src
.
Stride
(),
group_size
);
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
Mat
().
GroupMax
(
src
.
Mat
());
}
}
/*
Think of sv_labels as a Matrix, denoting the "correct" label of each frame to
each phone-state; it's very likely to contain a LOT of zeros
...
...
src/cudamatrix/cu-matrix.h
View file @
87337b60
...
...
@@ -197,6 +197,22 @@ class CuMatrixBase {
/// "output-elem" is whichever element of output depends on that input element.
void
GroupPnormDeriv
(
const
CuMatrixBase
<
Real
>
&
input
,
const
CuMatrixBase
<
Real
>
&
output
,
Real
power
);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// where G = x.NumCols() / y.NumCols() must be an integer.
/// [note: y corresponds to *this and x to src, so
/// src.NumCols() / this->NumCols() must be an integer.
void
GroupMax
(
const
CuMatrixBase
<
Real
>
&
src
);
/// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of
/// the group, and 0 otherwise. The equals the function derivative where it is
/// defined (it's not defined where multiple inputs in the group are equal to the output).
void
GroupMaxDeriv
(
const
CuMatrixBase
<
Real
>
&
input
,
const
CuMatrixBase
<
Real
>
&
output
);
/// Compute the hyperbolic tangent (tanh) function; element by element,
/// *this = tanh(src).
...
...
src/matrix/kaldi-matrix.cc
View file @
87337b60
...
...
@@ -1110,6 +1110,26 @@ void MatrixBase<Real>::GroupPnormDeriv(const MatrixBase<Real> &input,
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
GroupMaxDeriv
(
const
MatrixBase
<
Real
>
&
input
,
const
MatrixBase
<
Real
>
&
output
)
{
KALDI_ASSERT
(
input
.
NumCols
()
==
this
->
NumCols
()
&&
input
.
NumRows
()
==
this
->
NumRows
());
KALDI_ASSERT
(
this
->
NumCols
()
%
output
.
NumCols
()
==
0
&&
this
->
NumRows
()
==
output
.
NumRows
());
int
group_size
=
this
->
NumCols
()
/
output
.
NumCols
(),
num_rows
=
this
->
NumRows
(),
num_cols
=
this
->
NumCols
();
for
(
MatrixIndexT
i
=
0
;
i
<
num_rows
;
i
++
)
{
for
(
MatrixIndexT
j
=
0
;
j
<
num_cols
;
j
++
)
{
Real
input_val
=
input
(
i
,
j
);
Real
output_val
=
output
(
i
,
j
/
group_size
);
(
*
this
)(
i
,
j
)
=
(
input_val
==
output_val
?
1
:
0
);
}
}
}
template
<
typename
Real
>
// scales each column by scale[i].
void
MatrixBase
<
Real
>::
MulColsVec
(
const
VectorBase
<
Real
>
&
scale
)
{
KALDI_ASSERT
(
scale
.
Dim
()
==
num_cols_
);
...
...
@@ -2499,6 +2519,26 @@ void MatrixBase<Real>::GroupPnorm(const MatrixBase<Real> &src, Real power) {
(
*
this
)(
i
,
j
)
=
src
.
Row
(
i
).
Range
(
j
*
group_size
,
group_size
).
Norm
(
power
);
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
GroupMax
(
const
MatrixBase
<
Real
>
&
src
)
{
KALDI_ASSERT
(
src
.
NumCols
()
%
this
->
NumCols
()
==
0
&&
src
.
NumRows
()
==
this
->
NumRows
());
int
group_size
=
src
.
NumCols
()
/
this
->
NumCols
(),
num_rows
=
this
->
NumRows
(),
num_cols
=
this
->
NumCols
();
for
(
MatrixIndexT
i
=
0
;
i
<
num_rows
;
i
++
)
{
const
Real
*
src_row_data
=
src
.
RowData
(
i
);
for
(
MatrixIndexT
j
=
0
;
j
<
num_cols
;
j
++
)
{
Real
max_val
=
-
1e20
;
for
(
MatrixIndexT
k
=
0
;
k
<
group_size
;
k
++
)
{
Real
src_data
=
src_row_data
[
j
*
group_size
+
k
];
if
(
src_data
>
max_val
)
max_val
=
src_data
;
}
(
*
this
)(
i
,
j
)
=
max_val
;
}
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
CopyCols
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
)
{
...
...
src/matrix/kaldi-matrix.h
View file @
87337b60
...
...
@@ -426,7 +426,6 @@ class MatrixBase {
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
void
GroupPnorm
(
const
MatrixBase
<
Real
>
&
src
,
Real
power
);
/// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
...
...
@@ -436,6 +435,18 @@ class MatrixBase {
void
GroupPnormDeriv
(
const
MatrixBase
<
Real
>
&
input
,
const
MatrixBase
<
Real
>
&
output
,
Real
power
);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
void
GroupMax
(
const
MatrixBase
<
Real
>
&
src
);
/// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of
/// the group, and 0 otherwise. The equals the function derivative where it is
/// defined (it's not defined where multiple inputs in the group are equal to the output).
void
GroupMaxDeriv
(
const
MatrixBase
<
Real
>
&
input
,
const
MatrixBase
<
Real
>
&
output
);
/// Set each element to the tanh of the corresponding element of "src".
void
Tanh
(
const
MatrixBase
<
Real
>
&
src
);
...
...
src/nnet2/nnet-component.cc
View file @
87337b60
...
...
@@ -451,14 +451,7 @@ void MaxoutComponent::Propagate(const ChunkInfo &in_info,
in_info
.
CheckSize
(
in
);
out_info
.
CheckSize
(
*
out
);
KALDI_ASSERT
(
in_info
.
NumChunks
()
==
out_info
.
NumChunks
());
int32
group_size
=
input_dim_
/
output_dim_
;
for
(
MatrixIndexT
j
=
0
;
j
<
output_dim_
;
j
++
)
{
CuSubMatrix
<
BaseFloat
>
pool
(
out
->
ColRange
(
j
,
1
));
pool
.
Set
(
-
1e20
);
for
(
MatrixIndexT
i
=
0
;
i
<
group_size
;
i
++
)
pool
.
Max
(
in
.
ColRange
(
j
*
group_size
+
i
,
1
));
}
out
->
GroupMax
(
in
);
}
void
MaxoutComponent
::
Backprop
(
const
ChunkInfo
&
,
// in_info,
...
...
@@ -468,26 +461,9 @@ void MaxoutComponent::Backprop(const ChunkInfo &, // in_info,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
{
int32
group_size
=
input_dim_
/
output_dim_
;
in_deriv
->
Resize
(
in_value
.
NumRows
(),
in_value
.
NumCols
(),
kSetZero
);
for
(
MatrixIndexT
j
=
0
;
j
<
output_dim_
;
j
++
)
{
CuSubMatrix
<
BaseFloat
>
out_j
(
out_value
.
ColRange
(
j
,
1
));
for
(
MatrixIndexT
i
=
0
;
i
<
group_size
;
i
++
)
{
CuSubMatrix
<
BaseFloat
>
in_i
(
in_value
.
ColRange
(
j
*
group_size
+
i
,
1
));
CuSubMatrix
<
BaseFloat
>
in_deriv_i
(
in_deriv
->
ColRange
(
j
*
group_size
+
i
,
1
));
CuMatrix
<
BaseFloat
>
out_deriv_j
(
out_deriv
.
ColRange
(
j
,
1
));
// Only the pool-inputs with 'max-values'
// are used to back-propagate into,
// the rest of derivatives is zeroed-out by a mask.
CuMatrix
<
BaseFloat
>
mask
;
in_i
.
EqualElementMask
(
out_j
,
&
mask
);
out_deriv_j
.
MulElements
(
mask
);
in_deriv_i
.
AddMat
(
1.0
,
out_deriv_j
);
}
}
in_deriv
->
GroupMaxDeriv
(
in_value
,
out_value
);
in_deriv
->
MulRowsGroupMat
(
out_deriv
);
}
void
MaxoutComponent
::
Read
(
std
::
istream
&
is
,
bool
binary
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment