Commit 87337b60 authored by naxingyu's avatar naxingyu
Browse files

update Maxout GPU-CPU and test code

parent 4881b72e
......@@ -80,6 +80,7 @@ void cudaF_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, Matrix
void cudaF_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, MatrixDim d);
void cudaF_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size);
void cudaF_calc_pnorm_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, const float *x2, MatrixDim d, int src_stride, int group_size, float power);
void cudaF_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, const float *x2, MatrixDim d, int src_stride, int group_size);
void cudaF_div_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *vec_div, MatrixDim d);
void cudaF_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, float *dst, MatrixDim d, int src_stride, int A_trans);
void cudaF_add_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, const float *B, const float *C, float *dst, MatrixDim d, int stride_a, int stride_b, int stride_c);
......@@ -131,6 +132,7 @@ void cudaF_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, Matrix
void cudaF_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size, float power);
void cudaF_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size);
void cudaF_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, const float *y, MatrixDim d, int e_stride, int y_stride);
void cudaF_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride);
......@@ -206,6 +208,7 @@ void cudaD_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, Matr
void cudaD_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, MatrixDim d);
void cudaD_mul_rows_group_mat(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size);
void cudaD_calc_pnorm_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, const double *x2, MatrixDim d, int src_stride, int group_size, double power);
void cudaD_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, const double *x2, MatrixDim d, int src_stride, int group_size);
void cudaD_div_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *vec_div, MatrixDim d);
void cudaD_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, double *dst, MatrixDim d, int src_stride, int A_trans);
void cudaD_add_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, const double *B, const double *C, double *dst, MatrixDim d, int stride_a, int stride_b, int stride_c);
......@@ -260,6 +263,7 @@ void cudaD_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, Matr
void cudaD_log_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size, double power);
void cudaD_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size);
void cudaD_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, const double *y, MatrixDim d, int e_stride, int y_stride);
void cudaD_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride);
......
......@@ -474,6 +474,24 @@ static void _calc_pnorm_deriv(Real *deriv, const Real *vec, const Real *norm,
}
}
/// deriv is the derivative we will output; vec is the input we're computing
/// the group max on, "maxv" is the previously computed group max.
template<typename Real>
__global__
static void _calc_group_max_deriv(Real *deriv, const Real *vec, const Real *maxv,
MatrixDim d, int src_stride, int group_size) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (j < d.rows && i < d.cols ) {
int dst_index = i + j * d.stride,
src_index = i / group_size + j * src_stride;
Real vec_element = vec[dst_index], // this is the element of the original vector.
max_element = maxv[src_index]; // this is the max value
Real ans = (max_element == vec_element ? 1.0 : 0.0);
deriv[dst_index] = ans;
}
}
/// Set each element to y = (x == orig ? changed : x).
template<typename Real>
__global__
......@@ -1521,6 +1539,27 @@ static void _group_pnorm(Real *y, const Real *x, MatrixDim d, int src_stride,
}
}
}
template<typename Real>
__global__
static void _group_max(Real *y, const Real *x, MatrixDim d, int src_stride,
int group_size) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (j < d.rows && i < d.cols) {
int dst_index = i + j * d.stride;
int src_begin_index = i * group_size + j * src_stride;
Real max_value = -1e20;
int src_end_index = src_begin_index + group_size;
for (int src_index = src_begin_index; src_index < src_end_index;
src_index ++) {
if (!isnan(x[src_index]) && x[src_index] > max_value)
max_value = x[src_index];
}
y[dst_index] = max_value;
}
}
/*
* cu::
*/
......@@ -2060,6 +2099,12 @@ void cudaF_calc_pnorm_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1,
_calc_pnorm_deriv<<<Gr,Bl>>>(y, x1, x2, d, src_stride, group_size, power);
}
void cudaF_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1,
const float *x2, MatrixDim d, int src_stride,
int group_size) {
_calc_group_max_deriv<<<Gr,Bl>>>(y, x1, x2, d, src_stride, group_size);
}
void cudaF_div_rows_vec(dim3 Gr, dim3 Bl, float* mat, const float* vec_div, MatrixDim d) {
_div_rows_vec<<<Gr,Bl>>>(mat, vec_div, d);
}
......@@ -2234,6 +2279,10 @@ void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d,
_group_pnorm<<<Gr,Bl>>>(y, x, d, src_stride, group_size, power);
}
void cudaF_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size) {
_group_max<<<Gr,Bl>>>(y, x, d, src_stride, group_size);
}
void cudaF_sigmoid (dim3 Gr, dim3 Bl, float* y, const float* x, MatrixDim d, int src_stride) {
_sigmoid<<<Gr,Bl>>>(y, x, d, src_stride);
}
......@@ -2471,6 +2520,12 @@ void cudaD_calc_pnorm_deriv(dim3 Gr, dim3 Bl, double*y, const double* x1,
_calc_pnorm_deriv<<<Gr,Bl>>>(y, x1, x2, d, src_stride, group_size, power);
}
void cudaD_calc_group_max_deriv(dim3 Gr, dim3 Bl, double*y, const double* x1,
const double* x2, MatrixDim d, int src_stride,
int group_size) {
_calc_group_max_deriv<<<Gr,Bl>>>(y, x1, x2, d, src_stride, group_size);
}
void cudaD_div_rows_vec(dim3 Gr, dim3 Bl, double* mat, const double* vec_div, MatrixDim d) {
_div_rows_vec<<<Gr,Bl>>>(mat, vec_div, d);
}
......@@ -2645,6 +2700,11 @@ void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double* y, const double* x, MatrixDim d
_group_pnorm<<<Gr,Bl>>>(y, x, d, src_stride, group_size, power);
}
void cudaD_group_max(dim3 Gr, dim3 Bl, double* y, const double* x, MatrixDim d,
int src_stride, int group_size) {
_group_max<<<Gr,Bl>>>(y, x, d, src_stride, group_size);
}
void cudaD_sigmoid (dim3 Gr, dim3 Bl, double* y, const double* x, MatrixDim d, int src_stride) {
_sigmoid<<<Gr,Bl>>>(y, x, d, src_stride);
}
......
......@@ -116,6 +116,7 @@ inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale,
inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, float *mat, const float *scale, MatrixDim d) { cudaF_mul_rows_vec(Gr,Bl,mat,scale,d); }
inline void cuda_mul_rows_group_mat(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size) { cudaF_mul_rows_group_mat(Gr, Bl, y, x, d, src_stride, group_size); }
inline void cuda_calc_pnorm_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, const float *x2, MatrixDim d, int src_stride, int group_size, float power) {cudaF_calc_pnorm_deriv(Gr, Bl, y, x1, x2, d, src_stride, group_size, power); }
inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, float *y, const float *x1, const float *x2, MatrixDim d, int src_stride, int group_size) {cudaF_calc_group_max_deriv(Gr, Bl, y, x1, x2, d, src_stride, group_size); }
inline void cuda_add_mat(dim3 Gr, dim3 Bl, float alpha, const float *src, float *dst, MatrixDim d, int src_stride, int A_trans) { cudaF_add_mat(Gr,Bl,alpha,src,dst,d,src_stride, A_trans); }
inline void cuda_add_mat_mat_div_mat(dim3 Gr, dim3 Bl, const float *A, const float *B, const float *C, float *dst, MatrixDim d, int stride_a, int stride_b, int stride_c) { cudaF_add_mat_mat_div_mat(Gr,Bl,A,B,C,dst,d,stride_a,stride_b,stride_c); }
inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, float alpha, const float *col, float beta, float *dst, MatrixDim d) { cudaF_add_vec_to_cols(Gr,Bl,alpha,col,beta,dst,d); }
......@@ -182,6 +183,7 @@ inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_dat
*/
inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride) { cudaF_soft_hinge(Gr,Bl,y,x,d,src_stride); }
inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size, float power) { cudaF_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power);}
inline void cuda_group_max(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size) { cudaF_group_max(Gr, Bl, y, x, d, src_stride, group_size);}
inline void cuda_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride) { cudaF_sigmoid(Gr,Bl,y,x,d,src_stride); }
inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, float *eout, const float *e, const float *y, MatrixDim d, int e_stride, int y_stride) { cudaF_diff_sigmoid(Gr,Bl,eout,e,y,d,e_stride,y_stride); }
inline void cuda_tanh(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride) { cudaF_tanh(Gr,Bl,y,x,d,src_stride); }
......@@ -281,6 +283,7 @@ inline void cuda_mul_cols_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale
inline void cuda_mul_rows_vec(dim3 Gr, dim3 Bl, double *mat, const double *scale, MatrixDim d) { cudaD_mul_rows_vec(Gr,Bl,mat,scale,d); }
inline void cuda_mul_rows_group_mat(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size) { cudaD_mul_rows_group_mat(Gr, Bl, y, x, d, src_stride, group_size); }
inline void cuda_calc_pnorm_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, const double *x2, MatrixDim d, int src_stride, int group_size, double power) {cudaD_calc_pnorm_deriv(Gr, Bl, y, x1, x2, d, src_stride, group_size, power); }
inline void cuda_calc_group_max_deriv(dim3 Gr, dim3 Bl, double *y, const double *x1, const double *x2, MatrixDim d, int src_stride, int group_size) {cudaD_calc_group_max_deriv(Gr, Bl, y, x1, x2, d, src_stride, group_size); }
inline void cuda_add_mat(dim3 Gr, dim3 Bl, double alpha, const double *src, double *dst, MatrixDim d, int src_stride, int A_trans) { cudaD_add_mat(Gr,Bl,alpha,src,dst,d,src_stride, A_trans); }
inline void cuda_add_mat_mat_div_mat(dim3 Gr, dim3 Bl, const double *A, const double *B, const double *C, double *dst, MatrixDim d, int stride_a, int stride_b, int stride_c) { cudaD_add_mat_mat_div_mat(Gr,Bl,A,B,C,dst,d,stride_a,stride_b,stride_c); }
inline void cuda_add_vec_to_cols(dim3 Gr, dim3 Bl, double alpha, const double *col, double beta, double *dst, MatrixDim d) { cudaD_add_vec_to_cols(Gr,Bl,alpha,col,beta,dst,d); }
......@@ -343,6 +346,7 @@ inline void cuda_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_dat
*/
inline void cuda_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_soft_hinge(Gr,Bl,y,x,d,src_stride); }
inline void cuda_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size, double power) { cudaD_group_pnorm(Gr, Bl, y, x, d, src_stride, group_size, power); }
inline void cuda_group_max(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size) { cudaD_group_max(Gr, Bl, y, x, d, src_stride, group_size); }
inline void cuda_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_sigmoid(Gr,Bl,y,x,d,src_stride); }
inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, const double *y, MatrixDim d, int e_stride, int y_stride) { cudaD_diff_sigmoid(Gr,Bl,eout,e,y,d,e_stride,y_stride); }
inline void cuda_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_tanh(Gr,Bl,y,x,d,src_stride); }
......
......@@ -311,6 +311,39 @@ template<typename Real> void TestCuMatrixGroupPnormDeriv(int32 dim) {
<< dim << ", speed was " << gflops << " gigaflops.";
}
template<typename Real> void TestCuMatrixGroupMax(int32 dim) {
BaseFloat time_in_secs = 0.025;
int32 group_size = 4;
CuMatrix<Real> M(dim, dim), N(dim, dim / group_size);
M.SetRandn();
Timer tim;
int32 iter = 0;
for (;tim.Elapsed() < time_in_secs; iter++)
N.GroupMax(M);
BaseFloat fdim = dim;
BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
KALDI_LOG << "For CuMatrix::GroupMax" << NameOf<Real>() << ", for dim = "
<< dim << ", speed was " << gflops << " gigaflops.";
}
template<typename Real> void TestCuMatrixGroupMaxDeriv(int32 dim) {
BaseFloat time_in_secs = 0.025;
int32 group_size = 4;
CuMatrix<Real> M(dim, dim), N(dim, dim / group_size), O(dim, dim);
M.SetRandn();
N.GroupMax(M);
Timer tim;
int32 iter = 0;
for (;tim.Elapsed() < time_in_secs; iter++)
O.GroupMaxDeriv(M, N);
BaseFloat fdim = dim;
BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
KALDI_LOG << "For CuMatrix::GroupMaxDeriv" << NameOf<Real>() << ", for dim = "
<< dim << ", speed was " << gflops << " gigaflops.";
}
template<typename Real> void TestCuMatrixTraceMatMat(int32 dim) {
for (int32 n = 0; n < 2; n++) {
......@@ -517,6 +550,10 @@ template<typename Real> void CudaMatrixSpeedTest() {
TestCuMatrixGroupPnorm<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixGroupPnormDeriv<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixGroupMax<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixGroupMaxDeriv<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixTraceMatMat<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
......
......@@ -266,6 +266,27 @@ static void UnitTestCuMatrixGroupPnorm() {
}
}
template<typename Real>
static void UnitTestCuMatrixGroupMax() {
int32 M = 100 + Rand() % 200, N = 100 + Rand() % 200;
// M = 256; N = 256;
for (int32 K = 5; K < 7; K++) {
int32 N_src = N * K;
Matrix<Real> H_src(M, N_src);
H_src.SetRandn();
if (rand () % 2 == 0)
H_src.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
// do derivatives.
Matrix<Real> H(M, N);
H.GroupMax(H_src);
CuMatrix<Real> D(H_src);
CuMatrix<Real> E(M, N);
E.GroupMax(D);
Matrix<Real> H2(E);
AssertEqual(H,H2);
}
}
template<typename Real>
static void UnitTestCuMatrixSet() {
for (int32 i = 0; i < 2; i++) {
......@@ -686,6 +707,39 @@ static void UnitTestCuMatrixGroupPnormDeriv() {
AssertEqual(Hr,Hr2);
}
template<typename Real>
static void UnitTestCuMatrixGroupMaxDeriv() {
int32 dimM = 100 + Rand() % 200, dimNs = 100 + Rand() % 200;
int32 group_size = 1 + Rand() % 10;
// int32 dimM = 256, dimNs = 2;
// int32 group_size = 2;
int32 dimN = group_size * dimNs;
Matrix<Real> Hm(dimM, dimN);
Matrix<Real> Hr(dimM, dimN);
Matrix<Real> Hs(dimM, dimNs);
Hs.SetRandn();
if (rand () % 2 == 0)
Hm.ApplyFloor(0.0); // will put some zeros in the matrix.. harder to
// do derivatives.
Hs.GroupMax(Hm);
CuMatrix<Real> Dm(dimM, dimN);
CuMatrix<Real> Dr(dimM, dimN);
CuMatrix<Real> Ds(dimM, dimNs);
Dm.CopyFromMat(Hm);
Dr.CopyFromMat(Hr);
Ds.CopyFromMat(Hs);
// KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
Dr.GroupMaxDeriv(Dm, Ds);
Hr.GroupMaxDeriv(Hm, Hs);
// KALDI_LOG << "Hr " << Hr << " Dr " << Dr << "Ds" << Ds << " Hs " << Hs ;
Matrix<Real> Hr2(dimM, dimN);
Dr.CopyToMat(&Hr2);
AssertEqual(Hr,Hr2);
}
template<typename Real> static void UnitTestCuMatrixAddDiagVecMat() {
for (int p = 0; p < 4; p++) {
MatrixIndexT dimM = 100 + Rand() % 255, dimN = 100 + Rand() % 255;
......@@ -2072,6 +2126,8 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuDiffSigmoid<Real>();
UnitTestCuMatrixGroupPnorm<Real>();
UnitTestCuMatrixGroupPnormDeriv<Real>();
UnitTestCuMatrixGroupMax<Real>();
UnitTestCuMatrixGroupMaxDeriv<Real>();
UnitTestCuMatrixMulRowsVec<Real>();
UnitTestCuMatrixMulRowsGroupMat<Real>();
UnitTestCuFindRowMaxId<Real>();
......
......@@ -759,6 +759,32 @@ void CuMatrixBase<Real>::GroupPnormDeriv(const CuMatrixBase<Real> &src1,
}
}
template<typename Real>
void CuMatrixBase<Real>::GroupMaxDeriv(const CuMatrixBase<Real> &src1,
const CuMatrixBase<Real> &src2) {
KALDI_ASSERT(src2.NumCols() > 0);
int group_size = this->NumCols() / src2.NumCols();
KALDI_ASSERT(this->NumCols() == src2.NumCols() * group_size);
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
dim3 dimGrid(n_blocks(NumCols(), CU2DBLOCK), n_blocks(NumRows(), CU2DBLOCK));
cuda_calc_group_max_deriv(dimGrid, dimBlock, this->data_,
src1.Data(), src2.Data(), Dim(),
src2.Stride(), group_size);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
Mat().GroupMaxDeriv(src1.Mat(), src2.Mat());
}
}
template<typename Real>
void CuMatrixBase<Real>::DivRowsVec(const CuVectorBase<Real> &div) {
#if HAVE_CUDA == 1
......@@ -1137,6 +1163,27 @@ void CuMatrixBase<Real>::GroupPnorm(const CuMatrixBase<Real> &src, Real power) {
}
}
template<typename Real>
void CuMatrixBase<Real>::GroupMax(const CuMatrixBase<Real> &src) {
int group_size = src.NumCols() / this->NumCols();
KALDI_ASSERT(src.NumCols() == this->NumCols() * group_size &&
this->NumRows() == src.NumRows());
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
dim3 dimGrid(n_blocks(src.NumCols(), CU2DBLOCK), n_blocks(src.NumRows(), CU2DBLOCK));
cuda_group_max(dimGrid, dimBlock, this->data_, src.data_,
this->Dim(), src.Stride(), group_size);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
Mat().GroupMax(src.Mat());
}
}
/*
Think of sv_labels as a Matrix, denoting the "correct" label of each frame to
each phone-state; it's very likely to contain a LOT of zeros
......
......@@ -197,6 +197,22 @@ class CuMatrixBase {
/// "output-elem" is whichever element of output depends on that input element.
void GroupPnormDeriv(const CuMatrixBase<Real> &input,
const CuMatrixBase<Real> &output, Real power);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// where G = x.NumCols() / y.NumCols() must be an integer.
/// [note: y corresponds to *this and x to src, so
/// src.NumCols() / this->NumCols() must be an integer.
void GroupMax(const CuMatrixBase<Real> &src);
/// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of
/// the group, and 0 otherwise. The equals the function derivative where it is
/// defined (it's not defined where multiple inputs in the group are equal to the output).
void GroupMaxDeriv(const CuMatrixBase<Real> &input,
const CuMatrixBase<Real> &output);
/// Compute the hyperbolic tangent (tanh) function; element by element,
/// *this = tanh(src).
......
......@@ -1110,6 +1110,26 @@ void MatrixBase<Real>::GroupPnormDeriv(const MatrixBase<Real> &input,
}
}
template<typename Real>
void MatrixBase<Real>::GroupMaxDeriv(const MatrixBase<Real> &input,
const MatrixBase<Real> &output) {
KALDI_ASSERT(input.NumCols() == this->NumCols() &&
input.NumRows() == this->NumRows());
KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 &&
this->NumRows() == output.NumRows());
int group_size = this->NumCols() / output.NumCols(),
num_rows = this->NumRows(), num_cols = this->NumCols();
for (MatrixIndexT i = 0; i < num_rows; i++) {
for (MatrixIndexT j = 0; j < num_cols; j++) {
Real input_val = input(i, j);
Real output_val = output(i, j / group_size);
(*this)(i, j) = (input_val == output_val ? 1 : 0);
}
}
}
template<typename Real> // scales each column by scale[i].
void MatrixBase<Real>::MulColsVec(const VectorBase<Real> &scale) {
KALDI_ASSERT(scale.Dim() == num_cols_);
......@@ -2499,6 +2519,26 @@ void MatrixBase<Real>::GroupPnorm(const MatrixBase<Real> &src, Real power) {
(*this)(i, j) = src.Row(i).Range(j * group_size, group_size).Norm(power);
}
template<typename Real>
void MatrixBase<Real>::GroupMax(const MatrixBase<Real> &src) {
KALDI_ASSERT(src.NumCols() % this->NumCols() == 0 &&
src.NumRows() == this->NumRows());
int group_size = src.NumCols() / this->NumCols(),
num_rows = this->NumRows(), num_cols = this->NumCols();
for (MatrixIndexT i = 0; i < num_rows; i++) {
const Real *src_row_data = src.RowData(i);
for (MatrixIndexT j = 0; j < num_cols; j++) {
Real max_val = -1e20;
for (MatrixIndexT k = 0; k < group_size; k++) {
Real src_data = src_row_data[j * group_size + k];
if (src_data > max_val)
max_val = src_data;
}
(*this)(i, j) = max_val;
}
}
}
template<typename Real>
void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices) {
......
......@@ -426,7 +426,6 @@ class MatrixBase {
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
void GroupPnorm(const MatrixBase<Real> &src, Real power);
/// Calculate derivatives for the GroupPnorm function above...
/// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
......@@ -436,6 +435,18 @@ class MatrixBase {
void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output,
Real power);
/// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
/// Requires src.NumRows() == this->NumRows() and src.NumCols() % this->NumCols() == 0.
void GroupMax(const MatrixBase<Real> &src);
/// Calculate derivatives for the GroupMax function above, where
/// "input" is the input to the GroupMax function above (i.e. the "src" variable),
/// and "output" is the result of the computation (i.e. the "this" of that function
/// call), and *this must have the same dimension as "input". Each element
/// of *this will be set to 1 if the corresponding input equals the output of
/// the group, and 0 otherwise. The equals the function derivative where it is
/// defined (it's not defined where multiple inputs in the group are equal to the output).
void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output);
/// Set each element to the tanh of the corresponding element of "src".
void Tanh(const MatrixBase<Real> &src);
......
......@@ -451,14 +451,7 @@ void MaxoutComponent::Propagate(const ChunkInfo &in_info,
in_info.CheckSize(in);
out_info.CheckSize(*out);
KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
int32 group_size = input_dim_ / output_dim_;
for (MatrixIndexT j = 0; j < output_dim_; j++) {
CuSubMatrix<BaseFloat> pool(out->ColRange(j, 1));
pool.Set(-1e20);
for (MatrixIndexT i = 0; i < group_size; i++)
pool.Max(in.ColRange(j * group_size + i, 1));
}
out->GroupMax(in);
}
void MaxoutComponent::Backprop(const ChunkInfo &, // in_info,
......@@ -468,26 +461,9 @@ void MaxoutComponent::Backprop(const ChunkInfo &, // in_info,
const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update,
CuMatrix<BaseFloat> *in_deriv) const {
int32 group_size = input_dim_ / output_dim_;
in_deriv->Resize(in_value.NumRows(), in_value.NumCols(), kSetZero);
for (MatrixIndexT j = 0; j < output_dim_; j++) {
CuSubMatrix<BaseFloat> out_j(out_value.ColRange(j, 1));
for (MatrixIndexT i = 0; i < group_size; i++) {
CuSubMatrix<BaseFloat> in_i(
in_value.ColRange(j * group_size + i, 1));
CuSubMatrix<BaseFloat> in_deriv_i(
in_deriv->ColRange(j * group_size + i, 1));
CuMatrix<BaseFloat> out_deriv_j(out_deriv.ColRange(j, 1));
// Only the pool-inputs with 'max-values'
// are used to back-propagate into,
// the rest of derivatives is zeroed-out by a mask.
CuMatrix<BaseFloat> mask;
in_i.EqualElementMask(out_j, &mask);
out_deriv_j.MulElements(mask);
in_deriv_i.AddMat(1.0, out_deriv_j);
}
}
in_deriv->GroupMaxDeriv(in_value, out_value);
in_deriv->MulRowsGroupMat(out_deriv);
}
void MaxoutComponent::Read(std::istream &is, bool binary) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment