Commit 5526c218 authored by Daniel Povey's avatar Daniel Povey

Merge pull request #47 from naxingyu/convolution-nnet2

add Convolution component in nnet2
parents c105c63b 372a1505
......@@ -7,3 +7,6 @@ exp/tri5a/decode/cer_13:%WER 49.67 [ 27891 / 56154, 2877 ins, 4538 del, 20476 su
exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ]
exp/tri5a_mmi_b0.1/decode/cer_11:%WER 44.24 [ 24840 / 56154, 2060 ins, 4118 del, 18662 sub ]
exp/tri5a_mpe/decode/cer_12:%WER 44.96 [ 25247 / 56154, 2233 ins, 4174 del, 18840 sub ]
# ConvNet with 2 convolutional layers and 2 ReLU layers
exp/nnet2_convnet/decode/cer_10:%WER 40.73 [ 22873 / 56154, 2609 ins, 3712 del, 16552 sub ]
#!/bin/bash
# 2015 Xingyu Na
# This script runs on the full training set, using ConvNet setup on top of
# fbank features, on GPU. The ConvNet has four hidden layers, two convolutional
# layers and two affine transform layers with ReLU nonlinearity.
# Convolutional layer [1]:
# convolution1d, input feature dim is 36, filter dim is 7, output dim is
# 30, 128 filters are used
# maxpooling, 3-to-1 maxpooling, input dim is 30, output dim is 10
# Convolutional layer [2]:
# convolution1d, input feature dim is 10, filter dim is 4, output dim is
# 7, 256 filters are used
# Affine transform layers [3-4]:
# affine transform with ReLU nonlinearity.
temp_dir=
dir=exp/nnet2_convnet
stage=-5
train_original=data/train
train=data-fb/train
. ./cmd.sh
. ./path.sh
. utils/parse_options.sh
parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll
# likely have to change it.
# Make the FBANK features
if [ $stage -le -5 ]; then
# Dev set
utils/copy_data_dir.sh data/dev data-fb/dev || exit 1; rm $train/{cmvn,feats}.scp
steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
data-fb/dev data-fb/dev/log data-fb/dev/data || exit 1;
steps/compute_cmvn_stats.sh data-fb/dev data-fb/dev/log data-fb/dev/data || exit 1;
# Training set
utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
$train $train/log $train/data || exit 1;
steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
fi
(
if [ ! -f $dir/final.mdl ]; then
steps/nnet2/train_convnet_accel2.sh --parallel-opts "$parallel_opts" \
--cmd "$decode_cmd" --stage $stage \
--num-threads 1 --minibatch-size 512 \
--mix-up 20000 --samples-per-iter 300000 \
--num-epochs 15 --delta-order 2 \
--initial-effective-lrate 0.0005 --final-effective-lrate 0.000025 \
--num-jobs-initial 3 --num-jobs-final 8 --splice-width 5 \
--hidden-dim 2000 --num-filters1 128 --patch-dim1 7 --pool-size 3 \
--num-filters2 256 --patch-dim2 4 \
$train data/lang exp/tri5a_ali $dir || exit 1;
fi
steps/nnet2/decode.sh --cmd "$decode_cmd" --nj 10 \
--config conf/decode.config \
exp/tri5a/graph data-fb/dev \
$dir/decode || exit 1;
)
......@@ -84,7 +84,12 @@ fi
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
case $feat_type in
raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |";;
raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
if [ -f $srcdir/delta_order ]; then
delta_order=`cat $srcdir/delta_order 2>/dev/null`
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
;;
lda) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
......
This diff is collapsed.
......@@ -62,6 +62,7 @@ void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include
void cudaF_apply_heaviside(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
void cudaF_apply_floor(dim3 Gr, dim3 Bl, float* mat, float floor_val, MatrixDim d);
void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaF_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaF_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaF_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val, MatrixDim d);
void cudaF_set_diag(int Gr, int Bl, float* mat, float value, MatrixDim d);
......@@ -190,6 +191,7 @@ void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool inclu
void cudaD_apply_heaviside(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
void cudaD_apply_floor(dim3 Gr, dim3 Bl, double* mat, double floor_val, MatrixDim d);
void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaD_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaD_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride);
void cudaD_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val, MatrixDim d);
void cudaD_set_diag(int Gr, int Bl, double* mat, double value, MatrixDim d);
......
......@@ -1259,6 +1259,25 @@ static void _copy_cols(Real* dst, const Real *src, const MatrixIndexT_cuda* reor
}
}
template<typename Real>
__global__
static void _add_cols(Real* dst, const Real *src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
// Note: in this kernel, the x dimension corresponds to rows and the y to columns,
// as it will be going forward.
int i = blockIdx.x * blockDim.x + threadIdx.x;
int j = blockIdx.y * blockDim.y + threadIdx.y;
if (i < dst_dim.rows && j < dst_dim.cols) {
int index = reorder[j],
dst_index = i * dst_dim.stride + j;
if (index >= 0) {
int src_index = i * src_stride + reorder[j];
Real val = src[src_index];
dst[dst_index] += val;
}
}
}
template<typename Real>
__global__
static void _copy_rows(Real* dst, const Real *src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
......@@ -2024,6 +2043,10 @@ void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const Matri
_copy_cols<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
void cudaF_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
_add_cols<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
void cudaF_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
_copy_rows<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
......@@ -2445,6 +2468,10 @@ void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const Mat
_copy_cols<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
void cudaD_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
_add_cols<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
void cudaD_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
_copy_rows<<<Gr,Bl>>>(dst, src, reorder, dst_dim, src_stride);
}
......
......@@ -92,6 +92,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val,
inline void cuda_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaF_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
inline void cuda_add_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaF_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
inline void cuda_copy_rows(dim3 Gr, dim3 Bl, float* dst, const float* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaF_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
......@@ -259,6 +262,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val
inline void cuda_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaD_copy_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
inline void cuda_add_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaD_add_cols(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
inline void cuda_copy_rows(dim3 Gr, dim3 Bl, double* dst, const double* src, const MatrixIndexT_cuda* reorder, MatrixDim dst_dim, int src_stride) {
cudaD_copy_rows(Gr, Bl, dst, src, reorder, dst_dim, src_stride);
}
......
......@@ -509,6 +509,36 @@ static void UnitTestCuMatrixCopyCols() {
}
template<typename Real>
static void UnitTestCuMatrixAddCols() {
for (MatrixIndexT p = 0; p < 2; p++) {
MatrixIndexT num_cols1 = 10 + Rand() % 10,
num_cols2 = 10 + Rand() % 10,
num_rows = 10 + Rand() % 10;
CuMatrix<Real> M(num_rows, num_cols1);
M.SetRandn();
CuMatrix<Real> N(num_rows, num_cols2), O(num_rows, num_cols2);
std::vector<int32> reorder(num_cols2);
for (int32 i = 0; i < num_cols2; i++)
reorder[i] = -1 + (Rand() % (num_cols1 + 1));
if (Rand() % 2 == 0) {
N.AddCols(M, reorder);
} else {
CuArray<int32> cuda_reorder(reorder);
N.AddCols(M, cuda_reorder);
}
for (int32 i = 0; i < num_rows; i++)
for (int32 j = 0; j < num_cols2; j++)
if (reorder[j] < 0) O(i, j) = 0;
else O(i, j) = M(i, reorder[j]);
AssertEqual(N, O);
}
}
template<typename Real>
static void UnitTestCuMatrixApplyFloor() {
......@@ -2093,6 +2123,7 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixCopyFromTp<Real>();
UnitTestCuMatrixAddMatTp<Real>();
UnitTestCuMatrixCopyCols<Real>();
UnitTestCuMatrixAddCols<Real>();
UnitTestCuMatrixSumColumnRanges<Real>();
UnitTestCuMatrixCopyRows<Real>();
UnitTestCuMatrixCopyRowsFromVec<Real>();
......
......@@ -1960,6 +1960,56 @@ void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
}
}
template<typename Real>
void CuMatrixBase<Real>::AddCols(const CuMatrixBase<Real> &src,
const std::vector<MatrixIndexT> &reorder) {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
KALDI_ASSERT(static_cast<MatrixIndexT>(reorder.size()) == NumCols());
KALDI_ASSERT(NumRows() == src.NumRows());
#ifdef KALDI_PARANOID
MatrixIndexT src_cols = src.NumCols();
for (size_t i = 0; i < reorder.size(); i++)
KALDI_ASSERT(reorder[i] >= -1 && reorder[i] < src_cols);
#endif
CuArray<MatrixIndexT> cuda_reorder(reorder);
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
cuda_add_cols(dimGrid, dimBlock, data_, src.Data(), cuda_reorder.Data(), Dim(), src.Stride());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
Mat().AddCols(src.Mat(), reorder);
}
}
template<typename Real>
void CuMatrixBase<Real>::AddCols(const CuMatrixBase<Real> &src,
const CuArray<MatrixIndexT> &reorder) {
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
KALDI_ASSERT(reorder.Dim() == NumCols());
KALDI_ASSERT(NumRows() == src.NumRows());
Timer tim;
dim3 dimBlock(CU2DBLOCK, CU2DBLOCK);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3 dimGrid(n_blocks(NumRows(), CU2DBLOCK), n_blocks(NumCols(), CU2DBLOCK));
cuda_add_cols(dimGrid, dimBlock, data_, src.Data(), reorder.Data(), Dim(), src.Stride());
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim.Elapsed());
} else
#endif
{
std::vector<MatrixIndexT> reorder_cpu;
reorder.CopyToVec(&reorder_cpu);
Mat().AddCols(src.Mat(), reorder_cpu);
}
}
template<typename Real>
void CuMatrixBase<Real>::CopyRows(const CuMatrixBase<Real> &src,
......
......@@ -98,6 +98,18 @@ class CuMatrixBase {
void CopyCols(const CuMatrixBase<Real> &src,
const CuArray<MatrixIndexT> &indices);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void AddCols(const CuMatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Version of CopyCols that takes CuArray argument.
void AddCols(const CuMatrixBase<Real> &src,
const CuArray<MatrixIndexT> &indices);
/// Copies row r from row indices[r] of src.
/// As a special case, if indexes[i] <== -1, sets row i to zero
......
......@@ -2566,6 +2566,34 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
}
}
template<typename Real>
void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices) {
KALDI_ASSERT(NumRows() == src.NumRows());
KALDI_ASSERT(NumCols() == static_cast<MatrixIndexT>(indices.size()));
MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
this_stride = stride_, src_stride = src.stride_;
Real *this_data = this->data_;
const Real *src_data = src.data_;
#ifdef KALDI_PARANOID
MatrixIndexT src_cols = src.NumCols();
for (std::vector<MatrixIndexT>::const_iterator iter = indices.begin();
iter != indices.end(); ++iter)
KALDI_ASSERT(*iter >= -1 && *iter < src_cols);
#endif
// For the sake of memory locality we do this row by row, rather
// than doing it column-wise using cublas_Xcopy
for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride, src_data += src_stride) {
const MatrixIndexT *index_ptr = &(indices[0]);
for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) {
if (*index_ptr >= 0)
this_data[c] += src_data[*index_ptr];
}
}
}
template<typename Real>
void MatrixBase<Real>::CopyRows(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices) {
......
......@@ -284,6 +284,14 @@ class MatrixBase {
void CopyRows(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void AddCols(const MatrixBase<Real> &src,
const std::vector<MatrixIndexT> &indices);
/// Applies floor to all matrix elements
void ApplyFloor(Real floor_val);
......
......@@ -307,6 +307,31 @@ void UnitTestPnormComponent() {
}
}
void UnitTestMaxpoolingComponent() {
// works if it has an initializer from int,
// e.g. tanh, sigmoid.
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
for (int32 i = 0; i < 5; i++) {
int32 pool_stride = 5 + Rand() % 10,
pool_size = 2 + Rand() % 3,
num_pools = 1 + Rand() % 10;
int32 output_dim = num_pools * pool_stride;
int32 num_patches = num_pools * pool_size;
int32 input_dim = pool_stride * num_patches;
MaxpoolingComponent component(input_dim, output_dim,
pool_size, pool_stride);
UnitTestGenericComponentInternal(component);
}
{
MaxpoolingComponent component;
component.InitFromString("input-dim=192 output-dim=64 pool-size=3 pool-stride=16");
UnitTestGenericComponentInternal(component);
}
}
void UnitTestAffineComponent() {
......@@ -337,6 +362,44 @@ void UnitTestAffineComponent() {
}
}
void UnitTestConvolutional1dComponent() {
BaseFloat learning_rate = 0.01,
param_stddev = 0.1, bias_stddev = 1.0;
int32 patch_stride = 10, patch_step = 1, patch_dim = 4;
int32 num_patches = 1 + (patch_stride - patch_dim) / patch_step;
int32 num_splice = 5 + Rand() % 10, num_filters = 5 + Rand() % 10;
int32 input_dim = patch_stride * num_splice;
int32 filter_dim = patch_dim * num_splice;
int32 output_dim = num_patches * num_filters;
{
Convolutional1dComponent component;
if (Rand() % 2 == 0) {
component.Init(learning_rate, input_dim, output_dim,
patch_dim, patch_step, patch_stride,
param_stddev, bias_stddev);
} else {
// initialize the hyper-parameters
component.Init(learning_rate, input_dim, output_dim,
patch_dim, patch_step, patch_stride,
param_stddev, bias_stddev);
Matrix<BaseFloat> mat(num_filters, filter_dim + 1);
mat.SetRandn();
mat.Scale(param_stddev);
WriteKaldiObject(mat, "tmpf", true);
Sleep(0.5);
component.Init(learning_rate, "tmpf");
unlink("tmpf");
}
UnitTestGenericComponentInternal(component);
}
{
const char *str = "learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10";
Convolutional1dComponent component;
component.InitFromString(str);
UnitTestGenericComponentInternal(component);
}
}
void UnitTestDropoutComponent() {
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
......@@ -812,6 +875,7 @@ int main() {
UnitTestSpliceComponent();
UnitTestMaxoutComponent();
UnitTestPnormComponent();
UnitTestMaxpoolingComponent();
UnitTestGenericComponent<NormalizeComponent>();
UnitTestSigmoidComponent();
UnitTestAffineComponent();
......@@ -826,6 +890,7 @@ int main() {
UnitTestFixedBiasComponent();
UnitTestAffineComponentPreconditioned();
UnitTestAffineComponentPreconditionedOnline();
UnitTestConvolutional1dComponent();
UnitTestDropoutComponent();
UnitTestAdditiveNoiseComponent();
UnitTestParsing();
......
This diff is collapsed.
......@@ -448,6 +448,69 @@ class MaxoutComponent: public Component {
int32 output_dim_;
};
/**
* MaxPoolingComponent :
* Maxpooling component was firstly used in ConvNet for selecting an representative
* activation in an area. It inspired Maxout nonlinearity.
*
* The input/output matrices are split to submatrices with width 'pool_stride_'.
* For instance, a minibatch of 512 frames is propagated by a convolutional
* layer, resulting in a 512 x 3840 input matrix for MaxpoolingComponent,
* which is composed of 128 feature maps for each frame (128 x 30). If you want
* a 3-to-1 maxpooling on each feature map, set 'pool_stride_' and 'pool_size_'
* as 128 and 3 respectively. Maxpooling component would create an output
* matrix of 512 x 1280. The 30 input neurons are grouped by a group size of 3, and
* the maximum in a group is selected, creating a smaller feature map of 10.
*
* Our pooling does not supports overlaps, which simplifies the
* implementation (and was not helpful for Ossama).
*/
class MaxpoolingComponent: public Component {
public:
void Init(int32 input_dim, int32 output_dim,
int32 pool_size, int32 pool_stride);
explicit MaxpoolingComponent(int32 input_dim, int32 output_dim,
int32 pool_size, int32 pool_stride) {
Init(input_dim, output_dim, pool_size, pool_stride);
}
MaxpoolingComponent(): input_dim_(0), output_dim_(0),
pool_size_(0), pool_stride_(0) { }
virtual std::string Type() const { return "MaxpoolingComponent"; }
virtual void InitFromString(std::string args);
virtual int32 InputDim() const { return input_dim_; }
virtual int32 OutputDim() const { return output_dim_; }
using Component::Propagate; // to avoid name hiding
virtual void Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in,
CuMatrixBase<BaseFloat> *out) const;
virtual void Backprop(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &, //out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update, // may be identical to "this".
CuMatrix<BaseFloat> *in_deriv) const;
virtual bool BackpropNeedsInput() const { return true; }
virtual bool BackpropNeedsOutput() const { return true; }
virtual Component* Copy() const {
return new MaxpoolingComponent(input_dim_, output_dim_,
pool_size_, pool_stride_); }
virtual void Read(std::istream &is, bool binary); // This Read function
// requires that the Component has the correct type.
/// Write component to stream
virtual void Write(std::ostream &os, bool binary) const;
virtual std::string Info() const;
protected:
int32 input_dim_;
int32 output_dim_;
int32 pool_size_;
int32 pool_stride_;
};
class PnormComponent: public Component {
public:
void Init(int32 input_dim, int32 output_dim, BaseFloat p);
......@@ -1613,6 +1676,122 @@ class AdditiveNoiseComponent: public RandomComponent {
BaseFloat stddev_;
};
/**
* Convolutional1dComponent implements convolution over frequency axis.
* We assume the input featrues are spliced, i.e. each frame is in
* fact a set of stacked frames, where we can form patches which span
* over several frequency bands and whole time axis. A patch is the
* instance of a filter on a group of frequency bands and whole time
* axis. Shifts of the filter generate patches.
*
* The convolution is done over whole axis with same filter
* coefficients, i.e. we don't use separate filters for different
* 'regions' of frequency axis. Due to convolution, same weights are
* used repeateadly, the final gradient is a sum of all
* position-specific gradients (the sum was found better than
* averaging).
*
* In order to have a fast implementations, the filters are
* represented in vectorized form, where each rectangular filter
* corresponds to a row in a matrix, where all the filters are
* stored. The features are then re-shaped to a set of matrices, where
* one matrix corresponds to single patch-position, where all the
* filters get applied.
*
* The type of convolution is controled by hyperparameters:
* patch_dim_ ... frequency axis size of the patch
* patch_step_ ... size of shift in the convolution
* patch_stride_ ... shift for 2nd dim of a patch
* (i.e. frame length before splicing)
* For instance, for a convolutional component after raw input,
* if the input is 36-dim fbank feature with delta of order 2
* and spliced using +/- 5 frames of contexts, the convolutional
* component takes the input as a 36 x 33 image. The patch_stride_
* should be configured 36. If patch_step_ and patch_dim_ are
* configured 1 and 7, the Convolutional1dComponent creates a
* 2D filter of 7 x 33, such that the convolution is actually done
* only along the frequency axis. Specifically, the convolutional
* output along the frequency axis is (36 - 7) / 1 + 1 = 30, and
* the convolutional output along the temporal axis is 33 - 33 + 1 = 1,
* resulting in an output image of 30 x 1, which is called a feature map
* in ConvNet. Then if the output-dim is set 3840, the constructor
* would know there should be 3840 / 30 = 128 distinct filters,
* which will create 128 feature maps of 30 x 1 for one frame of
* input. The feature maps are vectorized as a 3840-dim row vector
* in the output matrix of this component. For details on progatation
* of Convolutional1dComponent, check the function definition.
*
*/
class Convolutional1dComponent: public UpdatableComponent {
public:
Convolutional1dComponent();
// constructor using another component
Convolutional1dComponent(const Convolutional1dComponent &component);
// constructor using parameters
Convolutional1dComponent(const CuMatrixBase<BaseFloat> &filter_params,
const CuVectorBase<BaseFloat> &bias_params,
BaseFloat learning_rate);
int32 InputDim() const;
int32 OutputDim() const;
void Init(BaseFloat learning_rate, int32 input_dim, int32 output_dim,
int32 patch_dim, int32 patch_step, int32 patch_stride,
BaseFloat param_stddev, BaseFloat bias_stddev);
void Init(BaseFloat learning_rate, std::string matrix_filename);
// resize the component, setting the parameters to zero, while
// leaving any other configuration values the same
void Resize(int32 input_dim, int32 output_dim);
std::string Info() const;
void InitFromString(std::string args);
std::string Type() const { return "Convolutional1dComponent"; }
bool BackpropNeedsInput() const { return true; }
bool BackpropNeedsOutput() const { return false; }
using Component::Propagate; // to avoid name hiding
void Propagate(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in,
CuMatrixBase<BaseFloat> *out) const;
void Scale(BaseFloat scale);
virtual void Add(BaseFloat alpha, const UpdatableComponent &other);
virtual void Backprop(const ChunkInfo &in_info,
const ChunkInfo &out_info,
const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_value,
const CuMatrixBase<BaseFloat> &out_deriv,
Component *to_update_in,
CuMatrix<BaseFloat> *in_deriv) const;
void SetZero(bool treat_as_gradient);
void Read(std::istream &is, bool binary);
void Write(std::ostream &os, bool binary) const;
virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
Component* Copy() const;
void PerturbParams(BaseFloat stddev);
void SetParams(const VectorBase<BaseFloat> &bias,
const MatrixBase<BaseFloat> &filter);
const CuVector<BaseFloat> &BiasParams() { return bias_params_; }
const CuMatrix<BaseFloat> &LinearParams() { return filter_params_; }
int32 GetParameterDim() const;
void Update(const CuMatrixBase<BaseFloat> &in_value,
const CuMatrixBase<BaseFloat> &out_deriv);
private:
int32 patch_dim_;
int32 patch_step_;
int32 patch_stride_;
static void ReverseIndexes(const std::vector<int32> &forward_indexes,
int32 input_dim,
std::vector<std::vector<int32> > *backward_indexes);
static void RearrangeIndexes(const std::vector<std::vector<int32> > &in,
std::vector<std::vector<int32> > *out);
const Convolutional1dComponent &operator = (const Convolutional1dComponent &other); // Disallow.
CuMatrix<BaseFloat> filter_params_;
CuVector<BaseFloat> bias_params_;
bool is_gradient_;
};
/// Functions used in Init routines. Suppose name=="foo", if "string" has a
/// field like foo=12, this function will set "param" to 12 and remove that
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment