Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
K
kaldi_2015
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yoann HOUPERT
kaldi_2015
Commits
5526c218
Commit
5526c218
authored
Aug 16, 2015
by
Daniel Povey
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #47 from naxingyu/convolution-nnet2
add Convolution component in nnet2
parents
c105c63b
372a1505
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
1794 additions
and
1 deletion
+1794
-1
egs/hkust/s5/RESULTS
egs/hkust/s5/RESULTS
+3
-0
egs/hkust/s5/local/nnet2/run_convnet.sh
egs/hkust/s5/local/nnet2/run_convnet.sh
+63
-0
egs/wsj/s5/steps/nnet2/decode.sh
egs/wsj/s5/steps/nnet2/decode.sh
+6
-1
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
+674
-0
src/cudamatrix/cu-kernels-ansi.h
src/cudamatrix/cu-kernels-ansi.h
+2
-0
src/cudamatrix/cu-kernels.cu
src/cudamatrix/cu-kernels.cu
+27
-0
src/cudamatrix/cu-kernels.h
src/cudamatrix/cu-kernels.h
+6
-0
src/cudamatrix/cu-matrix-test.cc
src/cudamatrix/cu-matrix-test.cc
+31
-0
src/cudamatrix/cu-matrix.cc
src/cudamatrix/cu-matrix.cc
+50
-0
src/cudamatrix/cu-matrix.h
src/cudamatrix/cu-matrix.h
+12
-0
src/matrix/kaldi-matrix.cc
src/matrix/kaldi-matrix.cc
+28
-0
src/matrix/kaldi-matrix.h
src/matrix/kaldi-matrix.h
+8
-0
src/nnet2/nnet-component-test.cc
src/nnet2/nnet-component-test.cc
+65
-0
src/nnet2/nnet-component.cc
src/nnet2/nnet-component.cc
+640
-0
src/nnet2/nnet-component.h
src/nnet2/nnet-component.h
+179
-0
No files found.
egs/hkust/s5/RESULTS
View file @
5526c218
...
...
@@ -7,3 +7,6 @@ exp/tri5a/decode/cer_13:%WER 49.67 [ 27891 / 56154, 2877 ins, 4538 del, 20476 su
exp/tri5a_mce/decode/cer_11:%WER 44.74 [ 25125 / 56154, 2112 ins, 4108 del, 18905 sub ]
exp/tri5a_mmi_b0.1/decode/cer_11:%WER 44.24 [ 24840 / 56154, 2060 ins, 4118 del, 18662 sub ]
exp/tri5a_mpe/decode/cer_12:%WER 44.96 [ 25247 / 56154, 2233 ins, 4174 del, 18840 sub ]
# ConvNet with 2 convolutional layers and 2 ReLU layers
exp/nnet2_convnet/decode/cer_10:%WER 40.73 [ 22873 / 56154, 2609 ins, 3712 del, 16552 sub ]
egs/hkust/s5/local/nnet2/run_convnet.sh
0 → 100755
View file @
5526c218
#!/bin/bash
# 2015 Xingyu Na
# This script runs on the full training set, using ConvNet setup on top of
# fbank features, on GPU. The ConvNet has four hidden layers, two convolutional
# layers and two affine transform layers with ReLU nonlinearity.
# Convolutional layer [1]:
# convolution1d, input feature dim is 36, filter dim is 7, output dim is
# 30, 128 filters are used
# maxpooling, 3-to-1 maxpooling, input dim is 30, output dim is 10
# Convolutional layer [2]:
# convolution1d, input feature dim is 10, filter dim is 4, output dim is
# 7, 256 filters are used
# Affine transform layers [3-4]:
# affine transform with ReLU nonlinearity.
temp_dir
=
dir
=
exp/nnet2_convnet
stage
=
-5
train_original
=
data/train
train
=
data-fb/train
.
./cmd.sh
.
./path.sh
.
utils/parse_options.sh
parallel_opts
=
"--gpu 1"
# This is suitable for the CLSP network, you'll
# likely have to change it.
# Make the FBANK features
if
[
$stage
-le
-5
]
;
then
# Dev set
utils/copy_data_dir.sh data/dev data-fb/dev
||
exit
1
;
rm
$train
/
{
cmvn,feats
}
.scp
steps/make_fbank.sh
--nj
10
--cmd
"
$train_cmd
"
\
data-fb/dev data-fb/dev/log data-fb/dev/data
||
exit
1
;
steps/compute_cmvn_stats.sh data-fb/dev data-fb/dev/log data-fb/dev/data
||
exit
1
;
# Training set
utils/copy_data_dir.sh
$train_original
$train
||
exit
1
;
rm
$train
/
{
cmvn,feats
}
.scp
steps/make_fbank.sh
--nj
10
--cmd
"
$train_cmd
"
\
$train
$train
/log
$train
/data
||
exit
1
;
steps/compute_cmvn_stats.sh
$train
$train
/log
$train
/data
||
exit
1
;
fi
(
if
[
!
-f
$dir
/final.mdl
]
;
then
steps/nnet2/train_convnet_accel2.sh
--parallel-opts
"
$parallel_opts
"
\
--cmd
"
$decode_cmd
"
--stage
$stage
\
--num-threads
1
--minibatch-size
512
\
--mix-up
20000
--samples-per-iter
300000
\
--num-epochs
15
--delta-order
2
\
--initial-effective-lrate
0.0005
--final-effective-lrate
0.000025
\
--num-jobs-initial
3
--num-jobs-final
8
--splice-width
5
\
--hidden-dim
2000
--num-filters1
128
--patch-dim1
7
--pool-size
3
\
--num-filters2
256
--patch-dim2
4
\
$train
data/lang exp/tri5a_ali
$dir
||
exit
1
;
fi
steps/nnet2/decode.sh
--cmd
"
$decode_cmd
"
--nj
10
\
--config
conf/decode.config
\
exp/tri5a/graph data-fb/dev
\
$dir
/decode
||
exit
1
;
)
egs/wsj/s5/steps/nnet2/decode.sh
View file @
5526c218
...
...
@@ -84,7 +84,12 @@ fi
splice_opts
=
`
cat
$srcdir
/splice_opts 2>/dev/null
`
case
$feat_type
in
raw
)
feats
=
"ark,s,cs:apply-cmvn
$cmvn_opts
--utt2spk=ark:
$sdata
/JOB/utt2spk scp:
$sdata
/JOB/cmvn.scp scp:
$sdata
/JOB/feats.scp ark:- |"
;;
raw
)
feats
=
"ark,s,cs:apply-cmvn
$cmvn_opts
--utt2spk=ark:
$sdata
/JOB/utt2spk scp:
$sdata
/JOB/cmvn.scp scp:
$sdata
/JOB/feats.scp ark:- |"
if
[
-f
$srcdir
/delta_order
]
;
then
delta_order
=
`
cat
$srcdir
/delta_order 2>/dev/null
`
feats
=
"
$feats
add-deltas --delta-order=
$delta_order
ark:- ark:- |"
fi
;;
lda
)
feats
=
"ark,s,cs:apply-cmvn
$cmvn_opts
--utt2spk=ark:
$sdata
/JOB/utt2spk scp:
$sdata
/JOB/cmvn.scp scp:
$sdata
/JOB/feats.scp ark:- | splice-feats
$splice_opts
ark:- ark:- | transform-feats
$srcdir
/final.mat ark:- ark:- |"
;;
*
)
echo
"
$0
: invalid feature type
$feat_type
"
&&
exit
1
;
...
...
egs/wsj/s5/steps/nnet2/train_convnet_accel2.sh
0 → 100755
View file @
5526c218
This diff is collapsed.
Click to expand it.
src/cudamatrix/cu-kernels-ansi.h
View file @
5526c218
...
...
@@ -62,6 +62,7 @@ void cudaF_apply_pow_abs(dim3 Gr, dim3 Bl, float* mat, float power, bool include
void
cudaF_apply_heaviside
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
MatrixDim
d
);
void
cudaF_apply_floor
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
float
floor_val
,
MatrixDim
d
);
void
cudaF_copy_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaF_apply_ceiling
(
dim3
Gr
,
dim3
Bl
,
float
*
mat
,
float
ceiling_val
,
MatrixDim
d
);
void
cudaF_set_diag
(
int
Gr
,
int
Bl
,
float
*
mat
,
float
value
,
MatrixDim
d
);
...
...
@@ -190,6 +191,7 @@ void cudaD_apply_pow_abs(dim3 Gr, dim3 Bl, double* mat, double power, bool inclu
void
cudaD_apply_heaviside
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
MatrixDim
d
);
void
cudaD_apply_floor
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
double
floor_val
,
MatrixDim
d
);
void
cudaD_copy_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
);
void
cudaD_apply_ceiling
(
dim3
Gr
,
dim3
Bl
,
double
*
mat
,
double
ceiling_val
,
MatrixDim
d
);
void
cudaD_set_diag
(
int
Gr
,
int
Bl
,
double
*
mat
,
double
value
,
MatrixDim
d
);
...
...
src/cudamatrix/cu-kernels.cu
View file @
5526c218
...
...
@@ -1259,6 +1259,25 @@ static void _copy_cols(Real* dst, const Real *src, const MatrixIndexT_cuda* reor
}
}
template
<
typename
Real
>
__global__
static
void
_add_cols
(
Real
*
dst
,
const
Real
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
// Note: in this kernel, the x dimension corresponds to rows and the y to columns,
// as it will be going forward.
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
j
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
i
<
dst_dim
.
rows
&&
j
<
dst_dim
.
cols
)
{
int
index
=
reorder
[
j
],
dst_index
=
i
*
dst_dim
.
stride
+
j
;
if
(
index
>=
0
)
{
int
src_index
=
i
*
src_stride
+
reorder
[
j
];
Real
val
=
src
[
src_index
];
dst
[
dst_index
]
+=
val
;
}
}
}
template
<
typename
Real
>
__global__
static
void
_copy_rows
(
Real
*
dst
,
const
Real
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
...
...
@@ -2024,6 +2043,10 @@ void cudaF_copy_cols(dim3 Gr, dim3 Bl, float* dst, const float* src, const Matri
_copy_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaF_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_add_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaF_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_copy_rows
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
@@ -2445,6 +2468,10 @@ void cudaD_copy_cols(dim3 Gr, dim3 Bl, double* dst, const double* src, const Mat
_copy_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaD_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_add_cols
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
void
cudaD_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
_copy_rows
<<<
Gr
,
Bl
>>>
(
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
src/cudamatrix/cu-kernels.h
View file @
5526c218
...
...
@@ -92,6 +92,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val,
inline
void
cuda_copy_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_copy_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_add_cols
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_add_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_copy_rows
(
dim3
Gr
,
dim3
Bl
,
float
*
dst
,
const
float
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaF_copy_rows
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
@@ -259,6 +262,9 @@ inline void cuda_apply_ceiling(dim3 Gr, dim3 Bl, double* mat, double ceiling_val
inline
void
cuda_copy_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_copy_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_add_cols
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_add_cols
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
inline
void
cuda_copy_rows
(
dim3
Gr
,
dim3
Bl
,
double
*
dst
,
const
double
*
src
,
const
MatrixIndexT_cuda
*
reorder
,
MatrixDim
dst_dim
,
int
src_stride
)
{
cudaD_copy_rows
(
Gr
,
Bl
,
dst
,
src
,
reorder
,
dst_dim
,
src_stride
);
}
...
...
src/cudamatrix/cu-matrix-test.cc
View file @
5526c218
...
...
@@ -509,6 +509,36 @@ static void UnitTestCuMatrixCopyCols() {
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixAddCols
()
{
for
(
MatrixIndexT
p
=
0
;
p
<
2
;
p
++
)
{
MatrixIndexT
num_cols1
=
10
+
Rand
()
%
10
,
num_cols2
=
10
+
Rand
()
%
10
,
num_rows
=
10
+
Rand
()
%
10
;
CuMatrix
<
Real
>
M
(
num_rows
,
num_cols1
);
M
.
SetRandn
();
CuMatrix
<
Real
>
N
(
num_rows
,
num_cols2
),
O
(
num_rows
,
num_cols2
);
std
::
vector
<
int32
>
reorder
(
num_cols2
);
for
(
int32
i
=
0
;
i
<
num_cols2
;
i
++
)
reorder
[
i
]
=
-
1
+
(
Rand
()
%
(
num_cols1
+
1
));
if
(
Rand
()
%
2
==
0
)
{
N
.
AddCols
(
M
,
reorder
);
}
else
{
CuArray
<
int32
>
cuda_reorder
(
reorder
);
N
.
AddCols
(
M
,
cuda_reorder
);
}
for
(
int32
i
=
0
;
i
<
num_rows
;
i
++
)
for
(
int32
j
=
0
;
j
<
num_cols2
;
j
++
)
if
(
reorder
[
j
]
<
0
)
O
(
i
,
j
)
=
0
;
else
O
(
i
,
j
)
=
M
(
i
,
reorder
[
j
]);
AssertEqual
(
N
,
O
);
}
}
template
<
typename
Real
>
static
void
UnitTestCuMatrixApplyFloor
()
{
...
...
@@ -2093,6 +2123,7 @@ template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixCopyFromTp
<
Real
>
();
UnitTestCuMatrixAddMatTp
<
Real
>
();
UnitTestCuMatrixCopyCols
<
Real
>
();
UnitTestCuMatrixAddCols
<
Real
>
();
UnitTestCuMatrixSumColumnRanges
<
Real
>
();
UnitTestCuMatrixCopyRows
<
Real
>
();
UnitTestCuMatrixCopyRowsFromVec
<
Real
>
();
...
...
src/cudamatrix/cu-matrix.cc
View file @
5526c218
...
...
@@ -1960,6 +1960,56 @@ void CuMatrixBase<Real>::CopyCols(const CuMatrixBase<Real> &src,
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
reorder
)
{
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
KALDI_ASSERT
(
static_cast
<
MatrixIndexT
>
(
reorder
.
size
())
==
NumCols
());
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
#ifdef KALDI_PARANOID
MatrixIndexT
src_cols
=
src
.
NumCols
();
for
(
size_t
i
=
0
;
i
<
reorder
.
size
();
i
++
)
KALDI_ASSERT
(
reorder
[
i
]
>=
-
1
&&
reorder
[
i
]
<
src_cols
);
#endif
CuArray
<
MatrixIndexT
>
cuda_reorder
(
reorder
);
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3
dimGrid
(
n_blocks
(
NumRows
(),
CU2DBLOCK
),
n_blocks
(
NumCols
(),
CU2DBLOCK
));
cuda_add_cols
(
dimGrid
,
dimBlock
,
data_
,
src
.
Data
(),
cuda_reorder
.
Data
(),
Dim
(),
src
.
Stride
());
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
Mat
().
AddCols
(
src
.
Mat
(),
reorder
);
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
reorder
)
{
#if HAVE_CUDA == 1
if
(
CuDevice
::
Instantiate
().
Enabled
())
{
KALDI_ASSERT
(
reorder
.
Dim
()
==
NumCols
());
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
Timer
tim
;
dim3
dimBlock
(
CU2DBLOCK
,
CU2DBLOCK
);
// This kernel, as it is newer has the (x,y) dims as (rows,cols).
dim3
dimGrid
(
n_blocks
(
NumRows
(),
CU2DBLOCK
),
n_blocks
(
NumCols
(),
CU2DBLOCK
));
cuda_add_cols
(
dimGrid
,
dimBlock
,
data_
,
src
.
Data
(),
reorder
.
Data
(),
Dim
(),
src
.
Stride
());
CU_SAFE_CALL
(
cudaGetLastError
());
CuDevice
::
Instantiate
().
AccuProfile
(
__func__
,
tim
.
Elapsed
());
}
else
#endif
{
std
::
vector
<
MatrixIndexT
>
reorder_cpu
;
reorder
.
CopyToVec
(
&
reorder_cpu
);
Mat
().
AddCols
(
src
.
Mat
(),
reorder_cpu
);
}
}
template
<
typename
Real
>
void
CuMatrixBase
<
Real
>::
CopyRows
(
const
CuMatrixBase
<
Real
>
&
src
,
...
...
src/cudamatrix/cu-matrix.h
View file @
5526c218
...
...
@@ -98,6 +98,18 @@ class CuMatrixBase {
void
CopyCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
indices
);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Version of CopyCols that takes CuArray argument.
void
AddCols
(
const
CuMatrixBase
<
Real
>
&
src
,
const
CuArray
<
MatrixIndexT
>
&
indices
);
/// Copies row r from row indices[r] of src.
/// As a special case, if indexes[i] <== -1, sets row i to zero
...
...
src/matrix/kaldi-matrix.cc
View file @
5526c218
...
...
@@ -2566,6 +2566,34 @@ void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
AddCols
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
)
{
KALDI_ASSERT
(
NumRows
()
==
src
.
NumRows
());
KALDI_ASSERT
(
NumCols
()
==
static_cast
<
MatrixIndexT
>
(
indices
.
size
()));
MatrixIndexT
num_rows
=
num_rows_
,
num_cols
=
num_cols_
,
this_stride
=
stride_
,
src_stride
=
src
.
stride_
;
Real
*
this_data
=
this
->
data_
;
const
Real
*
src_data
=
src
.
data_
;
#ifdef KALDI_PARANOID
MatrixIndexT
src_cols
=
src
.
NumCols
();
for
(
std
::
vector
<
MatrixIndexT
>::
const_iterator
iter
=
indices
.
begin
();
iter
!=
indices
.
end
();
++
iter
)
KALDI_ASSERT
(
*
iter
>=
-
1
&&
*
iter
<
src_cols
);
#endif
// For the sake of memory locality we do this row by row, rather
// than doing it column-wise using cublas_Xcopy
for
(
MatrixIndexT
r
=
0
;
r
<
num_rows
;
r
++
,
this_data
+=
this_stride
,
src_data
+=
src_stride
)
{
const
MatrixIndexT
*
index_ptr
=
&
(
indices
[
0
]);
for
(
MatrixIndexT
c
=
0
;
c
<
num_cols
;
c
++
,
index_ptr
++
)
{
if
(
*
index_ptr
>=
0
)
this_data
[
c
]
+=
src_data
[
*
index_ptr
];
}
}
}
template
<
typename
Real
>
void
MatrixBase
<
Real
>::
CopyRows
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
)
{
...
...
src/matrix/kaldi-matrix.h
View file @
5526c218
...
...
@@ -284,6 +284,14 @@ class MatrixBase {
void
CopyRows
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Add column indices[r] of src to column r.
/// As a special case, if indexes[i] == -1, skip column i
/// indices.size() must equal this->NumCols(),
/// all elements of "reorder" must be in [-1, src.NumCols()-1],
/// and src.NumRows() must equal this.NumRows()
void
AddCols
(
const
MatrixBase
<
Real
>
&
src
,
const
std
::
vector
<
MatrixIndexT
>
&
indices
);
/// Applies floor to all matrix elements
void
ApplyFloor
(
Real
floor_val
);
...
...
src/nnet2/nnet-component-test.cc
View file @
5526c218
...
...
@@ -307,6 +307,31 @@ void UnitTestPnormComponent() {
}
}
void
UnitTestMaxpoolingComponent
()
{
// works if it has an initializer from int,
// e.g. tanh, sigmoid.
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
for
(
int32
i
=
0
;
i
<
5
;
i
++
)
{
int32
pool_stride
=
5
+
Rand
()
%
10
,
pool_size
=
2
+
Rand
()
%
3
,
num_pools
=
1
+
Rand
()
%
10
;
int32
output_dim
=
num_pools
*
pool_stride
;
int32
num_patches
=
num_pools
*
pool_size
;
int32
input_dim
=
pool_stride
*
num_patches
;
MaxpoolingComponent
component
(
input_dim
,
output_dim
,
pool_size
,
pool_stride
);
UnitTestGenericComponentInternal
(
component
);
}
{
MaxpoolingComponent
component
;
component
.
InitFromString
(
"input-dim=192 output-dim=64 pool-size=3 pool-stride=16"
);
UnitTestGenericComponentInternal
(
component
);
}
}
void
UnitTestAffineComponent
()
{
...
...
@@ -337,6 +362,44 @@ void UnitTestAffineComponent() {
}
}
void
UnitTestConvolutional1dComponent
()
{
BaseFloat
learning_rate
=
0.01
,
param_stddev
=
0.1
,
bias_stddev
=
1.0
;
int32
patch_stride
=
10
,
patch_step
=
1
,
patch_dim
=
4
;
int32
num_patches
=
1
+
(
patch_stride
-
patch_dim
)
/
patch_step
;
int32
num_splice
=
5
+
Rand
()
%
10
,
num_filters
=
5
+
Rand
()
%
10
;
int32
input_dim
=
patch_stride
*
num_splice
;
int32
filter_dim
=
patch_dim
*
num_splice
;
int32
output_dim
=
num_patches
*
num_filters
;
{
Convolutional1dComponent
component
;
if
(
Rand
()
%
2
==
0
)
{
component
.
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
param_stddev
,
bias_stddev
);
}
else
{
// initialize the hyper-parameters
component
.
Init
(
learning_rate
,
input_dim
,
output_dim
,
patch_dim
,
patch_step
,
patch_stride
,
param_stddev
,
bias_stddev
);
Matrix
<
BaseFloat
>
mat
(
num_filters
,
filter_dim
+
1
);
mat
.
SetRandn
();
mat
.
Scale
(
param_stddev
);
WriteKaldiObject
(
mat
,
"tmpf"
,
true
);
Sleep
(
0.5
);
component
.
Init
(
learning_rate
,
"tmpf"
);
unlink
(
"tmpf"
);
}
UnitTestGenericComponentInternal
(
component
);
}
{
const
char
*
str
=
"learning-rate=0.01 input-dim=100 output-dim=70 param-stddev=0.1 patch-dim=4 patch-step=1 patch-stride=10"
;
Convolutional1dComponent
component
;
component
.
InitFromString
(
str
);
UnitTestGenericComponentInternal
(
component
);
}
}
void
UnitTestDropoutComponent
()
{
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
...
...
@@ -812,6 +875,7 @@ int main() {
UnitTestSpliceComponent
();
UnitTestMaxoutComponent
();
UnitTestPnormComponent
();
UnitTestMaxpoolingComponent
();
UnitTestGenericComponent
<
NormalizeComponent
>
();
UnitTestSigmoidComponent
();
UnitTestAffineComponent
();
...
...
@@ -826,6 +890,7 @@ int main() {
UnitTestFixedBiasComponent
();
UnitTestAffineComponentPreconditioned
();
UnitTestAffineComponentPreconditionedOnline
();
UnitTestConvolutional1dComponent
();
UnitTestDropoutComponent
();
UnitTestAdditiveNoiseComponent
();
UnitTestParsing
();
...
...
src/nnet2/nnet-component.cc
View file @
5526c218
This diff is collapsed.
Click to expand it.
src/nnet2/nnet-component.h
View file @
5526c218
...
...
@@ -448,6 +448,69 @@ class MaxoutComponent: public Component {
int32
output_dim_
;
};
/**
* MaxPoolingComponent :
* Maxpooling component was firstly used in ConvNet for selecting an representative
* activation in an area. It inspired Maxout nonlinearity.
*
* The input/output matrices are split to submatrices with width 'pool_stride_'.
* For instance, a minibatch of 512 frames is propagated by a convolutional
* layer, resulting in a 512 x 3840 input matrix for MaxpoolingComponent,
* which is composed of 128 feature maps for each frame (128 x 30). If you want
* a 3-to-1 maxpooling on each feature map, set 'pool_stride_' and 'pool_size_'
* as 128 and 3 respectively. Maxpooling component would create an output
* matrix of 512 x 1280. The 30 input neurons are grouped by a group size of 3, and
* the maximum in a group is selected, creating a smaller feature map of 10.
*
* Our pooling does not supports overlaps, which simplifies the
* implementation (and was not helpful for Ossama).
*/
class
MaxpoolingComponent
:
public
Component
{
public:
void
Init
(
int32
input_dim
,
int32
output_dim
,
int32
pool_size
,
int32
pool_stride
);
explicit
MaxpoolingComponent
(
int32
input_dim
,
int32
output_dim
,
int32
pool_size
,
int32
pool_stride
)
{
Init
(
input_dim
,
output_dim
,
pool_size
,
pool_stride
);
}
MaxpoolingComponent
()
:
input_dim_
(
0
),
output_dim_
(
0
),
pool_size_
(
0
),
pool_stride_
(
0
)
{
}
virtual
std
::
string
Type
()
const
{
return
"MaxpoolingComponent"
;
}
virtual
void
InitFromString
(
std
::
string
args
);
virtual
int32
InputDim
()
const
{
return
input_dim_
;
}
virtual
int32
OutputDim
()
const
{
return
output_dim_
;
}
using
Component
::
Propagate
;
// to avoid name hiding
virtual
void
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
;
virtual
void
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
,
//out_value,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update
,
// may be identical to "this".
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
;
virtual
bool
BackpropNeedsInput
()
const
{
return
true
;
}
virtual
bool
BackpropNeedsOutput
()
const
{
return
true
;
}
virtual
Component
*
Copy
()
const
{
return
new
MaxpoolingComponent
(
input_dim_
,
output_dim_
,
pool_size_
,
pool_stride_
);
}
virtual
void
Read
(
std
::
istream
&
is
,
bool
binary
);
// This Read function
// requires that the Component has the correct type.
/// Write component to stream
virtual
void
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
;
virtual
std
::
string
Info
()
const
;
protected:
int32
input_dim_
;
int32
output_dim_
;
int32
pool_size_
;
int32
pool_stride_
;
};
class
PnormComponent
:
public
Component
{
public:
void
Init
(
int32
input_dim
,
int32
output_dim
,
BaseFloat
p
);
...
...
@@ -1613,6 +1676,122 @@ class AdditiveNoiseComponent: public RandomComponent {
BaseFloat
stddev_
;
};
/**
* Convolutional1dComponent implements convolution over frequency axis.
* We assume the input featrues are spliced, i.e. each frame is in
* fact a set of stacked frames, where we can form patches which span
* over several frequency bands and whole time axis. A patch is the
* instance of a filter on a group of frequency bands and whole time
* axis. Shifts of the filter generate patches.
*
* The convolution is done over whole axis with same filter
* coefficients, i.e. we don't use separate filters for different
* 'regions' of frequency axis. Due to convolution, same weights are
* used repeateadly, the final gradient is a sum of all
* position-specific gradients (the sum was found better than
* averaging).
*
* In order to have a fast implementations, the filters are
* represented in vectorized form, where each rectangular filter
* corresponds to a row in a matrix, where all the filters are
* stored. The features are then re-shaped to a set of matrices, where
* one matrix corresponds to single patch-position, where all the
* filters get applied.
*
* The type of convolution is controled by hyperparameters:
* patch_dim_ ... frequency axis size of the patch
* patch_step_ ... size of shift in the convolution
* patch_stride_ ... shift for 2nd dim of a patch
* (i.e. frame length before splicing)
* For instance, for a convolutional component after raw input,
* if the input is 36-dim fbank feature with delta of order 2
* and spliced using +/- 5 frames of contexts, the convolutional
* component takes the input as a 36 x 33 image. The patch_stride_
* should be configured 36. If patch_step_ and patch_dim_ are
* configured 1 and 7, the Convolutional1dComponent creates a
* 2D filter of 7 x 33, such that the convolution is actually done
* only along the frequency axis. Specifically, the convolutional
* output along the frequency axis is (36 - 7) / 1 + 1 = 30, and
* the convolutional output along the temporal axis is 33 - 33 + 1 = 1,
* resulting in an output image of 30 x 1, which is called a feature map
* in ConvNet. Then if the output-dim is set 3840, the constructor
* would know there should be 3840 / 30 = 128 distinct filters,
* which will create 128 feature maps of 30 x 1 for one frame of
* input. The feature maps are vectorized as a 3840-dim row vector
* in the output matrix of this component. For details on progatation
* of Convolutional1dComponent, check the function definition.
*
*/
class
Convolutional1dComponent
:
public
UpdatableComponent
{
public:
Convolutional1dComponent
();
// constructor using another component
Convolutional1dComponent
(
const
Convolutional1dComponent
&
component
);
// constructor using parameters
Convolutional1dComponent
(
const
CuMatrixBase
<
BaseFloat
>
&
filter_params
,
const
CuVectorBase
<
BaseFloat
>
&
bias_params
,
BaseFloat
learning_rate
);
int32
InputDim
()
const
;
int32
OutputDim
()
const
;
void
Init
(
BaseFloat
learning_rate
,
int32
input_dim
,
int32
output_dim
,
int32
patch_dim
,
int32
patch_step
,
int32
patch_stride
,
BaseFloat
param_stddev
,
BaseFloat
bias_stddev
);
void
Init
(
BaseFloat
learning_rate
,
std
::
string
matrix_filename
);
// resize the component, setting the parameters to zero, while
// leaving any other configuration values the same
void
Resize
(
int32
input_dim
,
int32
output_dim
);
std
::
string
Info
()
const
;
void
InitFromString
(
std
::
string
args
);
std
::
string
Type
()
const
{
return
"Convolutional1dComponent"
;
}
bool
BackpropNeedsInput
()
const
{
return
true
;
}
bool
BackpropNeedsOutput
()
const
{
return
false
;
}
using
Component
::
Propagate
;
// to avoid name hiding
void
Propagate
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in
,
CuMatrixBase
<
BaseFloat
>
*
out
)
const
;
void
Scale
(
BaseFloat
scale
);
virtual
void
Add
(
BaseFloat
alpha
,
const
UpdatableComponent
&
other
);
virtual
void
Backprop
(
const
ChunkInfo
&
in_info
,
const
ChunkInfo
&
out_info
,
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
,
Component
*
to_update_in
,
CuMatrix
<
BaseFloat
>
*
in_deriv
)
const
;
void
SetZero
(
bool
treat_as_gradient
);
void
Read
(
std
::
istream
&
is
,
bool
binary
);
void
Write
(
std
::
ostream
&
os
,
bool
binary
)
const
;
virtual
BaseFloat
DotProduct
(
const
UpdatableComponent
&
other
)
const
;
Component
*
Copy
()
const
;
void
PerturbParams
(
BaseFloat
stddev
);
void
SetParams
(
const
VectorBase
<
BaseFloat
>
&
bias
,
const
MatrixBase
<
BaseFloat
>
&
filter
);
const
CuVector
<
BaseFloat
>
&
BiasParams
()
{
return
bias_params_
;
}
const
CuMatrix
<
BaseFloat
>
&
LinearParams
()
{
return
filter_params_
;
}
int32
GetParameterDim
()
const
;
void
Update
(
const
CuMatrixBase
<
BaseFloat
>
&
in_value
,
const
CuMatrixBase
<
BaseFloat
>
&
out_deriv
);
private:
int32
patch_dim_
;
int32
patch_step_
;
int32
patch_stride_
;
static
void
ReverseIndexes
(
const
std
::
vector
<
int32
>
&
forward_indexes
,
int32
input_dim
,
std
::
vector
<
std
::
vector
<
int32
>
>
*
backward_indexes
);
static
void
RearrangeIndexes
(
const
std
::
vector
<
std
::
vector
<
int32
>
>
&
in
,
std
::
vector
<
std
::
vector
<
int32
>
>
*
out
);
const
Convolutional1dComponent
&
operator
=
(
const
Convolutional1dComponent
&
other
);
// Disallow.
CuMatrix
<
BaseFloat
>
filter_params_
;
CuVector
<
BaseFloat
>
bias_params_
;
bool
is_gradient_
;
};
/// Functions used in Init routines. Suppose name=="foo", if "string" has a
/// field like foo=12, this function will set "param" to 12 and remove that
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment