Commit 1afc4b3c authored by Guoguo Chen's avatar Guoguo Chen
Browse files

trunk: adding LogSoftmax component to nnet2 setup

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5156 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent d772b2a9
......@@ -3,6 +3,7 @@
// Copyright 2009-2012 Karel Vesely
// 2013 Lucas Ondel
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -544,7 +545,7 @@ CuDevice::CuDevice(): active_gpu_id_(-1), verbose_(true)
CuDevice::~CuDevice() {
if (Enabled()) {
CU_SAFE_CALL(cublasShutdown());
cublasShutdown();
}
}
......
......@@ -4,7 +4,7 @@
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2013 Hainan Xu
// 2013 Xiaohui Zhang
// 2013 Johns Hopkins University (author: Guoguo Chen)
// 2013-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -128,6 +128,7 @@ void cudaF_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, int
* cu::
*/
void cudaF_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_soft_hinge(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride);
void cudaF_group_pnorm(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride, int group_size, float power);
void cudaF_sigmoid(dim3 Gr, dim3 Bl, float *y, const float *x, MatrixDim d, int src_stride);
......@@ -256,6 +257,7 @@ void cudaD_block_add_mat_mat(dim3 Gr, dim3 Bl, CuBlockMatrixData *B_cu_data, int
* cu::
*/
void cudaD_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_log_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_soft_hinge(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride);
void cudaD_group_pnorm(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride, int group_size, double power);
void cudaD_sigmoid(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride);
......
......@@ -5,7 +5,7 @@
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2013 Hainan Xu
// 2013 Xiaohui Zhang
// 2013 Johns Hopkins University (author: Guoguo Chen)
// 2013-2015 Guoguo Chen
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -1648,6 +1648,77 @@ static void _softmax_reduce(Real*y, const Real*x, MatrixDim d, int src_stride) {
}
template<typename Real>
__global__
static void _log_softmax_reduce(Real *y, const Real *x,
MatrixDim d, int src_stride) {
int j = blockIdx.x;
int THREADS = blockDim.x;
if (j >= d.rows) return;
__shared__ Real aux[CU1DBLOCK];
int steps = (d.cols - 1) / THREADS + 1;
// Maximum step 1: loads input data to <aux>. If <d.cols> is larger than
// <blockDim.x>, then we do a first pass filtering and only
// keep a <blockDim.x> size array.
aux[threadIdx.x] = x[threadIdx.x + j * d.stride];
for (int i = 1; i < steps; ++i) {
if (threadIdx.x + i * THREADS < d.cols
&& aux[threadIdx.x] < x[threadIdx.x + i * THREADS + j * d.stride])
aux[threadIdx.x] = x[threadIdx.x + i * THREADS + j * d.stride];
}
// Maximum step 2: the standard max reduce.
int nTotalThreads = THREADS;
__syncthreads();
while (nTotalThreads > 1) {
int halfPoint = ((1 + nTotalThreads) >> 1);
if (threadIdx.x < halfPoint) {
if (threadIdx.x + halfPoint < nTotalThreads
&& aux[threadIdx.x] < aux[threadIdx.x + halfPoint])
aux[threadIdx.x] = aux[threadIdx.x + halfPoint];
}
__syncthreads();
nTotalThreads = ((1 + nTotalThreads) >> 1);
}
Real max = aux[0];
__syncthreads();
// Log sum step 1: substracts max, and takes exponentials.
y[threadIdx.x + j * d.stride] = x[threadIdx.x + j * d.stride] - max;
aux[threadIdx.x] = exp(y[threadIdx.x + j * d.stride]);
for (int i = 1; i < steps; ++i) {
if (threadIdx.x + i * THREADS < d.cols) {
y[threadIdx.x + i * THREADS + j * d.stride] =
x[threadIdx.x + i * THREADS + j * d.stride] - max;
aux[threadIdx.x] += exp(y[threadIdx.x + i * THREADS + j * d.stride]);
}
}
// Log sum step 2: comptes summation and then takes logarithm.
nTotalThreads = THREADS;
__syncthreads();
while (nTotalThreads > 1) {
int halfPoint = ((1 + nTotalThreads) >> 1);
if (threadIdx.x < halfPoint) {
if (threadIdx.x + halfPoint < nTotalThreads)
aux[threadIdx.x] += aux[threadIdx.x + halfPoint];
}
__syncthreads();
nTotalThreads = ((1 + nTotalThreads) >> 1);
}
Real log_sum = log(aux[0]);
__syncthreads();
// Computes log softmax.
for (int i = 0; i < steps; ++i) {
if (threadIdx.x + i * THREADS < d.cols) {
y[threadIdx.x + i * THREADS + j * d.stride] -= log_sum;
}
}
}
template<typename Real>
__global__
......@@ -2183,6 +2254,9 @@ void cudaF_softmax_reduce (size_t Gr, size_t Bl, float* y, const float* x, Matri
_softmax_reduce<<<Gr,Bl>>>(y, x, d, src_stride);
}
void cudaF_log_softmax_reduce (size_t Gr, size_t Bl, float* y, const float* x, MatrixDim d, int src_stride) {
_log_softmax_reduce<<<Gr,Bl>>>(y, x, d, src_stride);
}
void cudaF_splice(dim3 Gr, dim3 Bl, float* y, const float* x, const int32_cuda* off, MatrixDim d_out, MatrixDim d_in) {
_splice<<<Gr,Bl>>>(y,x,off,d_out,d_in);
......@@ -2591,6 +2665,10 @@ void cudaD_softmax_reduce (size_t Gr, size_t Bl, double* y, const double* x, Mat
_softmax_reduce<<<Gr,Bl>>>(y, x, d, src_stride);
}
void cudaD_log_softmax_reduce (size_t Gr, size_t Bl, double* y, const double* x, MatrixDim d, int src_stride) {
_log_softmax_reduce<<<Gr,Bl>>>(y, x, d, src_stride);
}
void cudaD_splice(dim3 Gr, dim3 Bl, double* y, const double* x, const int32_cuda* off, MatrixDim d_out, MatrixDim d_in) {
_splice<<<Gr,Bl>>>(y,x,off,d_out,d_in);
}
......
......@@ -5,7 +5,7 @@
// 2014 Johns Hopkins University (author: Daniel Povey)
// 2013 Hainan Xu
// 2013 Xiaohui Zhang
// 2013 Johns Hopkins University (author: Guoguo Chen)
// 2013-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -191,6 +191,7 @@ Bl: dimBlock value is fixed min(d.col, CU1DBLOCK), represent CU1DBLOCK threads r
Gr: the number of rows
*/
inline void cuda_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, MatrixDim d, int src_stride) { cudaF_softmax_reduce(Gr,Bl,y,x,d,src_stride); }
inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, float *y, const float *x, MatrixDim d, int src_stride) { cudaF_log_softmax_reduce(Gr,Bl,y,x,d,src_stride); }
inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, float *wei, float *grad, float l1, float lr, MatrixDim d, int stride_grad) { cudaF_regularize_l1(Gr,Bl,wei,grad,l1,lr,d,stride_grad); }
inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const float *mat, float *vec_val, int32_cuda *vec_id, int32_cuda voff, MatrixDim d) { cudaF_find_row_max_id(Gr,Bl,mat,vec_val,vec_id,voff,d); }
......@@ -347,6 +348,7 @@ inline void cuda_diff_sigmoid(dim3 Gr, dim3 Bl, double *eout, const double *e, c
inline void cuda_tanh(dim3 Gr, dim3 Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_tanh(Gr,Bl,y,x,d,src_stride); }
inline void cuda_diff_tanh(dim3 Gr, dim3 Bl, double *eout, const double *e, const double *y, MatrixDim d, int e_stride, int y_stride) { cudaD_diff_tanh(Gr,Bl,eout,e,y,d,e_stride,y_stride); }
inline void cuda_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_softmax_reduce(Gr,Bl,y,x,d,src_stride); }
inline void cuda_log_softmax_reduce(size_t Gr, size_t Bl, double *y, const double *x, MatrixDim d, int src_stride) { cudaD_log_softmax_reduce(Gr,Bl,y,x,d,src_stride); }
inline void cuda_regularize_l1(dim3 Gr, dim3 Bl, double *wei, double *grad, double l1, double lr, MatrixDim d, int stride_grad) { cudaD_regularize_l1(Gr,Bl,wei,grad,l1,lr,d,stride_grad); }
inline void cuda_find_row_max_id(dim3 Gr, dim3 Bl, const double *mat, double *vec_val, int32_cuda *vec_id, int32_cuda voff, MatrixDim d) { cudaD_find_row_max_id(Gr,Bl,mat,vec_val,vec_id,voff,d); }
......
// cudamatrix/cu-matrix-speed-test.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -258,6 +259,24 @@ template<typename Real> void TestCuMatrixSoftmax(int32 dim) {
}
template<typename Real> void TestCuMatrixLogSoftmax(int32 dim) {
BaseFloat time_in_secs = 0.025;
CuMatrix<Real> M(dim, dim), N(dim, dim);
M.SetRandn();
N.SetRandn();
Timer tim;
int32 iter = 0;
for (;tim.Elapsed() < time_in_secs; iter++) {
N.ApplyLogSoftMaxPerRow(M);
}
BaseFloat fdim = dim;
BaseFloat gflops = (fdim * fdim * iter) / (tim.Elapsed() * 1.0e+09);
KALDI_LOG << "For CuMatrix::LogSoftmax" << NameOf<Real>() << ", for dim = "
<< dim << ", speed was " << gflops << " gigaflops.";
}
template<typename Real> void TestCuMatrixGroupPnorm(int32 dim) {
BaseFloat time_in_secs = 0.025;
int32 group_size = 4;
......@@ -492,6 +511,8 @@ template<typename Real> void CudaMatrixSpeedTest() {
TestCuMatrixMulRowsGroupMat<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixSoftmax<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixLogSoftmax<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
TestCuMatrixGroupPnorm<Real>(sizes[s]);
for (int32 s = 0; s < ns; s++)
......
......@@ -1532,6 +1532,36 @@ static void UnitTestCuSoftmax() {
}
template<typename Real>
static void UnitTestCuLogSoftmax() {
for (int32 i = 0; i < 2; i++) {
int row = 10 + Rand() % 40;
int col = 10 + Rand() % 50;
Matrix<Real> Hi(row, col);
Matrix<Real> Ho(row, col);
Hi.SetRandn();
Hi.Scale(5.0);
CuMatrix<Real> Di(row, col);
CuMatrix<Real> Do(row, col);
Di.CopyFromMat(Hi);
//gpu
Do.ApplyLogSoftMaxPerRow(Di);
//cpu
Ho.CopyFromMat(Hi);
for(MatrixIndexT r=0; r<Ho.NumRows(); r++) {
Ho.Row(r).ApplyLogSoftMax();
}
Matrix<Real> Ho2(Do);
AssertEqual(Ho, Ho2, 0.00001);
}
}
template<typename Real>
static void UnitTestCuFindRowMaxId() {
......@@ -2046,6 +2076,7 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixMulRowsGroupMat<Real>();
UnitTestCuFindRowMaxId<Real>();
UnitTestCuSoftmax<Real>();
UnitTestCuLogSoftmax<Real>();
UnitTestCuDiffXent<Real>();
UnitTestCheck<Real>();
UnitTestSwapCu2Cu<Real>();
......
......@@ -5,7 +5,7 @@
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2013 Hainan Xu
// 2013 Xiaohui Zhang
// 2013 Johns Hopkins University (author: Guoguo Chen)
// 2013-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -1223,6 +1223,30 @@ void CuMatrixBase<Real>::ApplySoftMaxPerRow(const CuMatrixBase<Real> &src) {
}
}
template<typename Real> // Y->this, X->src
void CuMatrixBase<Real>::ApplyLogSoftMaxPerRow(const CuMatrixBase<Real> &src) {
KALDI_ASSERT(SameDim(*this, src));
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
Timer tim;
size_t dimBlock = src.num_cols_ > CU1DBLOCK ? CU1DBLOCK : src.num_cols_;
size_t dimGrid = src.num_rows_;
cuda_log_softmax_reduce(dimGrid, dimBlock,
data_, src.data_, Dim(), src.Stride());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
MatrixBase<Real> &mat(this->Mat());
mat.CopyFromMat(src.Mat());
for(MatrixIndexT r = 0; r < mat.NumRows(); r++) {
mat.Row(r).ApplyLogSoftMax();
}
}
}
// DiffSigmoid(Ein, Y, Eout) -> Eout.DiffSigmoid(Y, Ein).
template<typename Real> // Eout -> *this, Ein -> diff, Y -> value
void CuMatrixBase<Real>::DiffSigmoid(const CuMatrixBase<Real> &value,
......
......@@ -4,7 +4,7 @@
// 2013 Johns Hopkins University (author: Daniel Povey)
// 2013 Hainan Xu
// 2013 Xiaohui Zhang
// 2013 Johns Hopkins University (author: Guoguo Chen)
// 2013-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -253,6 +253,11 @@ class CuMatrixBase {
/// for each row, the max value is first subtracted for good numerical stability
void ApplySoftMaxPerRow(const CuMatrixBase<Real> &src);
/// LogSoftmax nonlinearity
/// Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row
/// for each row, the max value is first subtracted for good numerical stability
void ApplyLogSoftMaxPerRow(const CuMatrixBase<Real> &src);
/// Find the id of the maximal element for each row
void FindRowMaxId(CuArray<int32> *id) const;
......
......@@ -4,6 +4,7 @@
// Saarland University; Go Vivace Inc.; Ariya Rastrow;
// Petr Schwarz; Yanmin Qian; Jan Silovsky;
// Haihua Xu; Wei Shi
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
......@@ -845,6 +846,17 @@ Real VectorBase<Real>::ApplySoftMax() {
return max + Log(sum);
}
template<typename Real>
Real VectorBase<Real>::ApplyLogSoftMax() {
Real max = this->Max(), sum = 0.0;
for (MatrixIndexT i = 0; i < dim_; i++) {
sum += Exp((data_[i] -= max));
}
sum = Log(sum);
this->Add(-1.0 * sum);
return max + sum;
}
#ifdef HAVE_MKL
template<>
void VectorBase<float>::Tanh(const VectorBase<float> &src) {
......
......@@ -5,6 +5,7 @@
// Ariya Rastrow; Petr Schwarz; Yanmin Qian;
// Karel Vesely; Go Vivace Inc.; Arnab Ghoshal
// Wei Shi;
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -142,6 +143,11 @@ class VectorBase {
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
Real ApplySoftMax();
/// Applies log soft-max to vector and returns normalizer (log sum of
/// exponentials).
/// This is the same as: \f$ x(i) = x(i) - log(\sum_i exp(x(i))) \f$
Real ApplyLogSoftMax();
/// Sets each element of *this to the tanh of the corresponding element of "src".
void Tanh(const VectorBase<Real> &src);
......
......@@ -5,6 +5,7 @@
// Go Vivace Inc.; Yanmin Qian; Jan Silovsky;
// Johns Hopkins University (Author: Daniel Povey);
// Haihua Xu; Wei Shi
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -602,9 +603,16 @@ static void UnitTestSimpleForVec() { // testing some simple operaters on vector
Real a = V.LogSumExp();
V2.Set(exp(V.LogSumExp()));
V1.DivElements(V2);
V2.CopyFromVec(V);
Real b = V.ApplySoftMax();
AssertEqual(V1, V);
AssertEqual(a, b);
V.ApplyLog();
Real c = V2.ApplyLogSoftMax();
AssertEqual(V2, V);
AssertEqual(a, c);
}
for (MatrixIndexT i = 0; i < 5; i++) {
......
// nnet2/nnet-component-test.cc
// Copyright 2012-2014 Johns Hopkins University (author: Daniel Povey)
// 2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -805,6 +806,7 @@ int main() {
UnitTestGenericComponent<PowerComponent>("power=1.0");
UnitTestGenericComponent<PermuteComponent>();
UnitTestGenericComponent<SoftmaxComponent>();
UnitTestGenericComponent<LogSoftmaxComponent>();
UnitTestGenericComponent<RectifiedLinearComponent>();
UnitTestGenericComponent<SoftHingeComponent>();
UnitTestSpliceComponent();
......
......@@ -2,7 +2,7 @@
// 2013-2014 Johns Hopkins University (author: Daniel Povey)
// 2013 Xiaohui Zhang
// 2014 Vijayaditya Peddinti
// 2014 Guoguo Chen
// 2014-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -56,6 +56,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new PowerComponent();
} else if (component_type == "SoftmaxComponent") {
ans = new SoftmaxComponent();
} else if (component_type == "LogSoftmaxComponent") {
ans = new LogSoftmaxComponent();
} else if (component_type == "RectifiedLinearComponent") {
ans = new RectifiedLinearComponent();
} else if (component_type == "NormalizeComponent") {
......@@ -1017,6 +1019,62 @@ void SoftmaxComponent::Backprop(const ChunkInfo &in_info,
}
}
void LogSoftmaxComponent::Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in,
CuMatrixBase<BaseFloat> *out) const {
in_info.CheckSize(in);
out_info.CheckSize(*out);
KALDI_ASSERT(in_info.NumChunks() == out_info.NumChunks());
// Applies log softmax function to each row of the output. For each row, we do
// x_i = x_i - log(sum_j exp(x_j))
out->ApplyLogSoftMaxPerRow(in);
// Just to be consistent with SoftmaxComponent::Propagate()
out->ApplyFloor(log(1.0e-20));
}
void LogSoftmaxComponent::Backprop(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &, //in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update,
CuMatrix<BaseFloat> *in_deriv) const {
/*
Let the output be y, then
y_i = x_i - log(sum_i exp(x_i))
where x_i is the input to the component. The Jacobian matrix of this
function is
J = I - 1 exp(y^T)
where 1 is a vector of ones. Let the derivative vector at the output be e,
and at the input be d, then we have
d = e - exp(y) Sum(e)
d_i = e_i - exp(y_i) Sum(e)
*/
in_deriv->Resize(out_deriv.NumRows(), out_deriv.NumCols());
KALDI_ASSERT(SameDim(out_value, out_deriv) && SameDim(out_value, *in_deriv));
const CuMatrixBase<BaseFloat> &Y(out_value), &E(out_deriv);
CuMatrixBase<BaseFloat> &D (*in_deriv);
D.CopyFromMat(Y);
D.ApplyExp(); // exp(y)
CuVector<BaseFloat> E_sum(D.NumRows()); // Initializes to zero
E_sum.AddColSumMat(1.0, E); // Sum(e)
D.MulRowsVec(E_sum); // exp(y) Sum(e)
D.Scale(-1.0); // - exp(y) Sum(e)
D.AddMat(1.0, E, kNoTrans); // e - exp(y_i) Sum(e)
// Updates stats.
if (to_update != NULL) {
NonlinearComponent *to_update_nonlinear =
dynamic_cast<NonlinearComponent*>(to_update);
to_update_nonlinear->UpdateStats(out_value);
}
}
void AffineComponent::Scale(BaseFloat scale) {
linear_params_.Scale(scale);
bias_params_.Scale(scale);
......
......@@ -4,7 +4,7 @@
// 2012-2014 Johns Hopkins University (author: Daniel Povey)
// 2013 Xiaohui Zhang
// 2014 Vijayaditya Peddinti
// 2014 Guoguo Chen
// 2014-2015 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors
//
......@@ -386,6 +386,7 @@ class NonlinearComponent: public Component {
friend class SigmoidComponent;
friend class TanhComponent;
friend class SoftmaxComponent;
friend class LogSoftmaxComponent;
friend class RectifiedLinearComponent;
friend class SoftHingeComponent;
......@@ -742,6 +743,32 @@ class SoftmaxComponent: public NonlinearComponent {
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
};
class LogSoftmaxComponent: public NonlinearComponent {
public:
explicit LogSoftmaxComponent(int32 dim): NonlinearComponent(dim) { }
explicit LogSoftmaxComponent(const LogSoftmaxComponent &other): NonlinearComponent(other) { }
LogSoftmaxComponent() { }
virtual std::string Type() const { return "LogSoftmaxComponent"; }
virtual bool BackpropNeedsInput() const { return false; }
virtual bool BackpropNeedsOutput() const { return true; }
using Component::Propagate; // to avoid name hiding
virtual void Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in,
CuMatrixBase<BaseFloat> *out) const;
virtual void Backprop(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
virtual Component* Copy() const { return new LogSoftmaxComponent(*this); }
private:
LogSoftmaxComponent &operator = (const LogSoftmaxComponent &other); // Disallow.
};
class FixedAffineComponent;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment