Commit 8b60e01b authored by Sri Harish Mallidi's avatar Sri Harish Mallidi
Browse files

trunk,nnet1 :

- faster cnn2d by pre-computing scaling factors used on gradients of cnn2d weights
- unit tests for all methods of cnn2d, max-pool-2d, avg-pool-2d components
- added cnn2d recipe in Resource Management, local/nnet/run_cnn2d.sh
bugfix steps/nnet/make_priors.sh : in importing cmvn-option in preparing feature-extraction pipeline 
 


git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5165 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 0b51c8c1
#!/bin/bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
dev=data-fbank/test
train=data-fbank/train
dev_original=data/test
train_original=data/train
gmm=exp/tri3b
stage=0
. utils/parse_options.sh
# Make the FBANK features
if [ $stage -le 0 ]; then
# Dev set
utils/copy_data_dir.sh $dev_original $dev || exit 1; rm $dev/{cmvn,feats}.scp
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
$dev $dev/log $dev/data || exit 1;
steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1;
# Training set
utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
$train $train/log $train/data || exit 1;
steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
# Split the training set
utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
fi
# Run the CNN pre-training.
if [ $stage -le 1 ]; then
dir=exp/cnn2d4c
ali=${gmm}_ali
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh \
--cmvn-opts "--norm-means=true --norm-vars=true" \
--delta-opts "--delta-order=2" --splice 5 \
--network-type cnn2d --cnn-proto-opts "--pool1-y-len=4 --pool1-y-step=4 --pitch-dim=3" \
--hid-layers 2 --learn-rate 0.008 \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# Pre-train stack of RBMs on top of the convolutional layers (4 layers, 1024 units)
if [ $stage -le 2 ]; then
dir=exp/cnn2d4c_pretrain-dbn
transf_cnn=exp/cnn2d4c/final.feature_transform_cnn # transform with convolutional layers
# Train
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --nn-depth 4 --hid-dim 1024 --rbm-iter 20 \
--feature-transform $transf_cnn --input-vis-type bern \
--param-stddev-first 0.05 --param-stddev 0.05 \
$train $dir || exit 1
fi
# Re-align using CNN
if [ $stage -le 3 ]; then
dir=exp/cnn2d4c
steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
$train data/lang $dir ${dir}_ali || exit 1
fi
# Train the DNN optimizing cross-entropy.
if [ $stage -le 4 ]; then
dir=exp/cnn2d4c_pretrain-dbn_dnn; [ ! -d $dir ] && mkdir -p $dir/log;
ali=exp/cnn2d4c_ali
feature_transform=exp/cnn2d4c/final.feature_transform
feature_transform_dbn=exp/cnn2d4c_pretrain-dbn/final.feature_transform
dbn=exp/cnn2d4c_pretrain-dbn/4.dbn
cnn_dbn=$dir/cnn_dbn.nnet
{ # Concatenate CNN layers and DBN,
num_components=$(nnet-info $feature_transform | grep -m1 num-components | awk '{print $2;}')
nnet-concat "nnet-copy --remove-first-layers=$num_components $feature_transform_dbn - |" $dbn $cnn_dbn \
2>$dir/log/concat_cnn_dbn.log || exit 1
}
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $cnn_dbn --hid-layers 0 \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. For RM good acwt is 0.2
dir=exp/cnn2d4c_pretrain-dbn_dnn_smbr
srcdir=exp/cnn2d4c_pretrain-dbn_dnn
acwt=0.2
# First we generate lattices and alignments:
if [ $stage -le 4 ]; then
steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
$train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
$train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
# Re-train the DNN by 6 iterations of sMBR
if [ $stage -le 5 ]; then
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \
$train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode
for ITER in 1 2 3 4 5 6; do
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmm/graph $dev $dir/decode_it${ITER} || exit 1
done
fi
echo Success
exit 0
......@@ -57,7 +57,7 @@ D=$nndir
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# apply-cmvn (optional),
[ ! -z "$cmvn_opts" -a ! -f $sdata/1/cmvn.scp ] && echo "$0: Missing $sdata/1/cmvn.scp" && exit 1
[ ! -z "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/utt2spk scp:$sdata/cmvn.scp ark:- ark:- |"
[ ! -z "$cmvn_opts" ] && feats="$feats apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
# add-deltas (optional),
[ ! -z "$delta_opts" ] && feats="$feats add-deltas $delta_opts ark:- ark:- |"
#
......
......@@ -356,7 +356,16 @@ if [[ -z "$nnet_init" && -z "$nnet_proto" ]]; then
"$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto || exit 1
;;
cnn2d)
#TODO, to be filled by Vijay...
delta_order=$([ -z $delta_opts ] && echo "0" || { echo $delta_opts | tr ' ' '\n' | grep "delta[-_]order" | sed 's:^.*=::'; })
echo "Debug : $delta_opts, delta_order $delta_order"
utils/nnet/make_cnn2d_proto.py $cnn_proto_opts \
--splice=$splice --delta-order=$delta_order --dir=$dir \
$num_fea >$nnet_proto || exit 1
cnn_fea=$(cat $nnet_proto | grep -v '^$' | tail -n1 | awk '{ print $5; }')
utils/nnet/make_nnet_proto.py $proto_opts \
--no-proto-head --no-smaller-input-weights \
${bn_dim:+ --bottleneck-dim=$bn_dim} \
"$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto || exit 1
;;
lstm)
utils/nnet/make_lstm_proto.py $proto_opts \
......
#!/usr/bin/python
# Copyright 2014 Brno University of Technology (author: Karel Vesely)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Generated Nnet prototype, to be initialized by 'nnet-initialize'.
import math, random, sys, warnings
from optparse import OptionParser
###
### Parse options
###
usage="%prog [options] <feat-dim> <num-leaves> <num-hidden-layers> <num-hidden-neurons> >nnet-proto-file"
parser = OptionParser(usage)
parser.add_option('--activation-type', dest='activation_type',
help='Select type of activation function : (<Sigmoid>|<Tanh>) [default: %default]',
default='<Sigmoid>', type='string');
parser.add_option('--cnn1-num-filters', dest='cnn1_num_filters',
help='Number of filters in first convolutional layer [default: %default]',
default=128, type='int')
# this is given by splice
# parser.add_option('--cnn1-fmap-x-len', dest='cnn1_fmap_x_len',
# help='Size of cnn1-fmap-x-len [default: %default]',
# default=11, type='int')
# this should be equal to feat_raw_dim
# parser.add_option('--cnn1-fmap-y-len', dest='cnn1_fmap_y_len',
# help='Size of cnn1-fmap-y-len [default: %default]',
# default=32, type='int')
parser.add_option('--cnn1-filt-x-len', dest='cnn1_filt_x_len',
help='Size of cnn1-filt-x-len [default: %default]',
default=9, type='int')
parser.add_option('--cnn1-filt-y-len', dest='cnn1_filt_y_len',
help='Size of cnn1-filt-y-len [default: %default]',
default=9, type='int')
parser.add_option('--cnn1-filt-x-step', dest='cnn1_filt_x_step',
help='Size of cnn1-filt-x-step [default: %default]',
default=1, type='int')
parser.add_option('--cnn1-filt-y-step', dest='cnn1_filt_y_step',
help='Size of cnn1-filt-y-step [default: %default]',
default=1, type='int')
parser.add_option('--cnn1-connect-fmap', dest='cnn1_connect_fmap',
help='Size of cnn1-connect-fmap [default: %default]',
default=0, type='int')
parser.add_option('--pool1-x-len', dest='pool1_x_len',
help='Size of pool1-filt-x-len [default: %default]',
default=1, type='int')
parser.add_option('--pool1-x-step', dest='pool1_x_step',
help='Size of pool1-x-step [default: %default]',
default=1, type='int')
#
parser.add_option('--pool1-y-len', dest='pool1_y_len',
help='Size of pool1-y-len [default: %default]',
default=3, type='int')
parser.add_option('--pool1-y-step', dest='pool1_y_step',
help='Size of pool1-y-step [default: %default]',
default=3, type='int')
parser.add_option('--pool1-type', dest='pool1_type',
help='Type of pooling (Max || Average) [default: %default]',
default='Max', type='string')
parser.add_option('--cnn2-num-filters', dest='cnn2_num_filters',
help='Number of filters in first convolutional layer [default: %default]',
default=256, type='int')
parser.add_option('--cnn2-filt-x-len', dest='cnn2_filt_x_len',
help='Size of cnn2-filt-x-len [default: %default]',
default=3, type='int')
parser.add_option('--cnn2-filt-y-len', dest='cnn2_filt_y_len',
help='Size of cnn2-filt-y-len [default: %default]',
default=4, type='int')
parser.add_option('--cnn2-filt-x-step', dest='cnn2_filt_x_step',
help='Size of cnn2-filt-x-step [default: %default]',
default=1, type='int')
parser.add_option('--cnn2-filt-y-step', dest='cnn2_filt_y_step',
help='Size of cnn2-filt-y-step [default: %default]',
default=1, type='int')
parser.add_option('--cnn2-connect-fmap', dest='cnn2_connect_fmap',
help='Size of cnn2-connect-fmap [default: %default]',
default=1, type='int')
parser.add_option('--pitch-dim', dest='pitch_dim',
help='Number of features representing pitch [default: %default]',
default=0, type='int')
parser.add_option('--delta-order', dest='delta_order',
help='Order of delta features [default: %default]',
default=2, type='int')
parser.add_option('--splice', dest='splice',
help='Length of splice [default: %default]',
default=5,type='int')
parser.add_option('--dir', dest='dirct',
help='Directory, where network prototypes will be saved [default: %default]',
default='.', type='string')
parser.add_option('--num-pitch-neurons', dest='num_pitch_neurons',
help='Number of neurons in layers processing pitch features [default: %default]',
default='200', type='int')
(o,args) = parser.parse_args()
if len(args) != 1 :
parser.print_help()
sys.exit(1)
feat_dim=int(args[0])
### End parse options
feat_raw_dim = feat_dim / (o.delta_order+1) / (o.splice*2+1) - o.pitch_dim # we need number of feats without deltas and splice and pitch
o.cnn1_fmap_y_len = feat_raw_dim
o.cnn1_fmap_x_len = o.splice*2+1
# Check
assert(feat_dim > 0)
assert(o.pool1_type == 'Max' or o.pool1_type == 'Average')
## Extra checks if dimensions are matching, if not match them by
## producing a warning
# cnn1
assert( (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) % o.cnn1_filt_y_step == 0 )
assert( (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) % o.cnn1_filt_x_step == 0 )
# subsample1
cnn1_out_fmap_y_len=((1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step))
cnn1_out_fmap_x_len=((1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step))
# fix filt_len and filt_step
def fix_filt_step(inp_len, filt_len, filt_step):
if ((inp_len - filt_len) % filt_step == 0):
return filt_step
else:
# filt_step <= filt_len
for filt_step in xrange(filt_len, 0, -1):
if ((inp_len - filt_len) % filt_step == 0):
return filt_step
o.pool1_y_step = fix_filt_step(cnn1_out_fmap_y_len, o.pool1_y_len, o.pool1_y_step)
if o.pool1_y_step == 1 and o.pool1_y_len != 1:
warnings.warn('WARNING: Choose different pool1_y_len as subsampling is not happening');
o.pool1_x_step = fix_filt_step(cnn1_out_fmap_x_len, o.pool1_x_len, o.pool1_x_step)
if o.pool1_x_step == 1 and o.pool1_x_len != 1:
warnings.warn('WARNING: Choose different pool1_x_len as subsampling is not happening');
###
### Print prototype of the network
###
# Begin the prototype
print "<NnetProto>"
# Convolutional part of network
'''1st CNN layer'''
cnn1_input_dim=feat_raw_dim * (o.delta_order+1) * (o.splice*2+1)
cnn1_out_fmap_x_len=((1 + (o.cnn1_fmap_x_len - o.cnn1_filt_x_len) / o.cnn1_filt_x_step))
cnn1_out_fmap_y_len=((1 + (o.cnn1_fmap_y_len - o.cnn1_filt_y_len) / o.cnn1_filt_y_step))
cnn1_output_dim=o.cnn1_num_filters * cnn1_out_fmap_x_len * cnn1_out_fmap_y_len
'''1st Pooling layer'''
pool1_input_dim=cnn1_output_dim
pool1_fmap_x_len=cnn1_out_fmap_x_len
pool1_out_fmap_x_len=((1 + (pool1_fmap_x_len - o.pool1_x_len) / o.pool1_x_step))
pool1_fmap_y_len=cnn1_out_fmap_y_len
pool1_out_fmap_y_len=((1 + (pool1_fmap_y_len - o.pool1_y_len) / o.pool1_y_step))
pool1_output_dim=o.cnn1_num_filters*pool1_out_fmap_x_len*pool1_out_fmap_y_len
'''2nd CNN layer'''
cnn2_input_dim=pool1_output_dim
cnn2_fmap_x_len=pool1_out_fmap_x_len
cnn2_out_fmap_x_len=((1 + (cnn2_fmap_x_len - o.cnn2_filt_x_len) / o.cnn2_filt_x_step))
cnn2_fmap_y_len=pool1_out_fmap_y_len
cnn2_out_fmap_y_len=((1 + (cnn2_fmap_y_len - o.cnn2_filt_y_len) / o.cnn2_filt_y_step))
cnn2_output_dim=o.cnn2_num_filters * cnn2_out_fmap_x_len * cnn2_out_fmap_y_len
convolution_proto = ''
convolution_proto += "<Convolutional2DComponent> <InputDim> %d <OutputDim> %d <FmapXLen> %d <FmapYLen> %d <FiltXLen> %d <FiltYLen> %d <FiltXStep> %d <FiltYStep> %d <ConnectFmap> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n" % \
( cnn1_input_dim, cnn1_output_dim, o.cnn1_fmap_x_len, o.cnn1_fmap_y_len, o.cnn1_filt_x_len, o.cnn1_filt_y_len, o.cnn1_filt_x_step, o.cnn1_filt_y_step, o.cnn1_connect_fmap, 0.0, 0.0, 0.01 )
convolution_proto += "<%sPooling2DComponent> <InputDim> %d <OutputDim> %d <FmapXLen> %d <FmapYLen> %d <PoolXLen> %d <PoolYLen> %d <PoolXStep> %d <PoolYStep> %d\n" % \
( o.pool1_type, pool1_input_dim, pool1_output_dim, pool1_fmap_x_len, pool1_fmap_y_len, o.pool1_x_len, o.pool1_y_len, o.pool1_x_step, o.pool1_y_step )
convolution_proto += "<Rescale> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
( pool1_output_dim, pool1_output_dim, 1.0 )
convolution_proto += "<AddShift> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
( pool1_output_dim, pool1_output_dim, 0.0 )
convolution_proto += "%s <InputDim> %d <OutputDim> %d\n" % \
( o.activation_type, pool1_output_dim, pool1_output_dim )
convolution_proto += "<Convolutional2DComponent> <InputDim> %d <OutputDim> %d <FmapXLen> %d <FmapYLen> %d <FiltXLen> %d <FiltYLen> %d <FiltXStep> %d <FiltYStep> %d <ConnectFmap> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n" % \
( cnn2_input_dim, cnn2_output_dim, cnn2_fmap_x_len, cnn2_fmap_y_len, o.cnn2_filt_x_len, o.cnn2_filt_y_len, o.cnn2_filt_x_step, o.cnn2_filt_y_step, o.cnn2_connect_fmap, -2.0, 4.0, 0.1 )
convolution_proto += "<Rescale> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
( cnn2_output_dim, cnn2_output_dim, 1.0)
convolution_proto += "<AddShift> <InputDim> %d <OutputDim> %d <InitParam> %f\n" % \
( cnn2_output_dim, cnn2_output_dim, 0.0)
convolution_proto += "%s <InputDim> %d <OutputDim> %d\n" % \
( o.activation_type, cnn2_output_dim, cnn2_output_dim)
if (o.pitch_dim > 0):
# convolutional part
f_conv = open('%s/nnet.proto.convolution' % o.dirct, 'w')
f_conv.write('<NnetProto>\n')
f_conv.write(convolution_proto)
f_conv.write('</NnetProto>\n')
f_conv.close()
# pitch part
f_pitch = open('%s/nnet.proto.pitch' % o.dirct, 'w')
f_pitch.write('<NnetProto>\n')
f_pitch.write('<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n' % \
((o.pitch_dim * (o.delta_order+1) * (o.splice*2+1)), o.num_pitch_neurons, -2.0, 4.0, 0.109375))
f_pitch.write('%s <InputDim> %d <OutputDim> %d\n' % \
(o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons))
f_pitch.write('<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f\n' % \
(o.num_pitch_neurons, o.num_pitch_neurons, -2.0, 4.0, 0.109375))
f_pitch.write('%s <InputDim> %d <OutputDim> %d\n' % \
(o.activation_type, o.num_pitch_neurons, o.num_pitch_neurons))
f_pitch.write('</NnetProto>\n')
f_pitch.close()
# paralell part
vector = ''
for i in range(1, (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), feat_raw_dim + o.pitch_dim):
vector += '%d:1:%d ' % (i, i + feat_raw_dim - 1)
for i in range(feat_raw_dim+1, (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), feat_raw_dim + o.pitch_dim):
vector += '%d:1:%d ' % (i, i + o.pitch_dim - 1)
print '<Copy> <InputDim> %d <OutputDim> %d <BuildVector> %s </BuildVector> ' % \
((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), (feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), vector)
print '<ParallelComponent> <InputDim> %d <OutputDim> %d <NestedNnetProto> %s %s </NestedNnetProto>' % \
((feat_raw_dim + o.pitch_dim) * (o.delta_order+1) * (o.splice*2+1), o.num_pitch_neurons + cnn2_output_dim, '%s/nnet.proto.convolution' % o.dirct, '%s/nnet.proto.pitch' % o.dirct)
num_convolution_output = o.num_pitch_neurons + cnn2_output_dim
else: # no pitch
print convolution_proto
# We are done!
sys.exit(0)
// nnet/nnet-component-test.cc
// Copyright 2014 Brno University of Technology (author: Karel Vesely),
// Copyright 2014-2015 Brno University of Technology (author: Karel Vesely),
// The Johns Hopkins University (author: Sri Harish Mallidi)
// See ../../COPYING for clarification regarding multiple authors
......@@ -187,153 +187,113 @@ namespace nnet1 {
delete c;
}
void UnitTestMaxPooling2DComponent() { /* Implemented by Harish Mallidi */
// make max-pooling2d component
Component* c = Component::Init("<MaxPooling2DComponent> <InputDim> 56 <OutputDim> 18 \
<FmapXLen> 4 <FmapYLen> 7 <PoolXLen> 2 <PoolYLen> 3 <PoolXStep> 1 <PoolYStep> 2");
/* TODO for Harish!
void UnitTestMaxPooling2DComponent(){
std::string dim_str;
std::ifstream infile("/home/harish/kaldi_cnn_testfiles/avgpool1.txt");
std::getline(infile, dim_str);
std::stringstream stream(dim_str);
std::vector<int> dims;
int n;
while(stream >> n){
dims.push_back(n);
}
std::string comp_data_str, matrix_str;
std::getline(infile, comp_data_str);
std::getline(infile, matrix_str);
MaxPooling2DComponent* c = new MaxPooling2DComponent(dims[0], dims[1]);
std::istringstream is_comp_data(comp_data_str);
c->ReadData(is_comp_data, false);
std::istringstream is_matrix(matrix_str);
// input matrix,
CuMatrix<BaseFloat> mat_in;
mat_in.Read(is_matrix, false);
ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23 24 24 25 25 26 26 27 27 ]", &mat_in);
CuMatrix<BaseFloat> mat_out;
c->Propagate(mat_in,&mat_out);
KALDI_LOG << "mat_out " << mat_out;
std::string mat_out_diff_str;
std::getline(infile, mat_out_diff_str);
std::istringstream is_mat_out_diff(mat_out_diff_str);
CuMatrix<BaseFloat> out_diff, in_diff;
out_diff.Read(is_mat_out_diff, false);
c->Backpropagate(mat_in, mat_out, out_diff, &in_diff);
KALDI_LOG << "out_diff" << out_diff;
KALDI_LOG << "in_diff " << in_diff;
delete c;
// expected output (max values in the patch)
CuMatrix<BaseFloat> mat_out_ref;
ReadCuMatrixFromString("[ 9 9 11 11 13 13 16 16 18 18 20 20 23 23 25 25 27 27 ]", &mat_out_ref);
}
*/
// propagate,
CuMatrix<BaseFloat> mat_out;
c->Propagate(mat_in, &mat_out);
KALDI_LOG << "mat_out" << mat_out << "mat_out_ref" << mat_out_ref;
AssertEqual(mat_out, mat_out_ref);
/* TODO for Harish:
void UnitTestAveragePooling2DComponent(){
std::string dim_str;
// locations of max values will be shown
CuMatrix<BaseFloat> mat_out_diff(mat_out);
ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 ]", &mat_out_diff);
std::ifstream infile("/home/harish/kaldi_cnn_testfiles/avgpool1.txt");
std::getline(infile, dim_str);
//expected backpropagated values,
CuMatrix<BaseFloat> mat_in_diff_ref; //hand-computed back-propagated values,
ReadCuMatrixFromString("[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.25 0.25 0 0 1 1 0 0 0 0 0.75 0.75 0 0 1 1 0 0 2.5 2.5 0 0 0 0 3 3 0 0 3.5 3.5 0 0 8 8 ]", &mat_in_diff_ref);
std::stringstream stream(dim_str);
// backpropagate,
CuMatrix<BaseFloat> mat_in_diff;
c->Backpropagate(mat_in, mat_out, mat_out_diff, &mat_in_diff);
KALDI_LOG << "mat_in_diff " << mat_in_diff << " mat_in_diff_ref " << mat_in_diff_ref;
AssertEqual(mat_in_diff, mat_in_diff_ref);
std::vector<int> dims;
int n;
while(stream >> n){
dims.push_back(n);
delete c;
}
std::string comp_data_str, matrix_str;
std::getline(infile, comp_data_str);
std::getline(infile, matrix_str);
void UnitTestAveragePooling2DComponent() { /* Implemented by Harish Mallidi */
// make average-pooling2d component
Component* c = Component::Init("<AveragePooling2DComponent> <InputDim> 56 <OutputDim> 18 \
<FmapXLen> 4 <FmapYLen> 7 <PoolXLen> 2 <PoolYLen> 3 <PoolXStep> 1 <PoolYStep> 2");
AveragePooling2DComponent* c = new AveragePooling2DComponent(dims[0], dims[1]);
std::istringstream is_comp_data(comp_data_str);
c->ReadData(is_comp_data, false);
std::istringstream is_matrix(matrix_str);
// input matrix,
CuMatrix<BaseFloat> mat_in;
mat_in.Read(is_matrix, false);
CuMatrix<BaseFloat> mat_out;
c->Propagate(mat_in,&mat_out);
KALDI_LOG << "mat_out " << mat_out;
std::string mat_out_diff_str;
std::getline(infile, mat_out_diff_str);
std::istringstream is_mat_out_diff(mat_out_diff_str);
CuMatrix<BaseFloat> out_diff, in_diff;
out_diff.Read(is_mat_out_diff, false);
ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23 24 24 25 25 26 26 27 27 ]", &mat_in);
c->Backpropagate(mat_in, mat_out, out_diff, &in_diff);
KALDI_LOG << "out_diff" << out_diff;
KALDI_LOG << "in_diff " << in_diff;
delete c;
}
*/
// expected output (max values in the patch)
CuMatrix<BaseFloat> mat_out_ref;
ReadCuMatrixFromString("[ 4.5 4.5 6.5 6.5 8.5 8.5 11.5 11.5 13.5 13.5 15.5 15.5 18.5 18.5 20.5 20.5 22.5 22.5 ]", &mat_out_ref);
// propagate,
CuMatrix<BaseFloat> mat_out;
c->Propagate(mat_in, &mat_out);
KALDI_LOG << "mat_out" << mat_out << "mat_out_ref" << mat_out_ref;
AssertEqual(mat_out, mat_out_ref);
/* TODO for Harish:
void UnitTestConvolutional2DComponent() {
std::string dim_str;
// locations of max values will be shown
CuMatrix<BaseFloat> mat_out_diff(mat_out);
ReadCuMatrixFromString("[ 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 ]", &mat_out_diff);
std::ifstream infile("/home/harish/kaldi_cnn_testfiles/filt6.txt");
std::getline(infile, dim_str);
// expected backpropagated values,
CuMatrix<BaseFloat> mat_in_diff_ref; // hand-computed back-propagated values,
ReadCuMatrixFromString("[ 0 0 0 0 0.0833333 0.0833333 0.166667 0.166667 0.25 0.25 0.333333 0.333333 0.333333 0.333333 0.25 0.25 0.25 0.25 0.333333 0.333333 0.416667 0.416667 0.5 0.5 0.583333 0.583333 0.583333 0.583333 0.75 0.75 0.75 0.75 0.833333 0.833333 0.916667 0.916667 1 1 1.08333 1.08333 1.08333 1.08333 1 1 1 1 1.08333 1.08333 1.16667 1.16667 1.25 1.25 1.33333 1.33333 1.33333 1.33333 ]", &mat_in_diff_ref);
std::stringstream stream(dim_str);
// backpropagate,
CuMatrix<BaseFloat> mat_in_diff;
c->Backpropagate(mat_in, mat_out, mat_out_diff, &mat_in_diff);
KALDI_LOG << "mat_in_diff " << mat_in_diff << " mat_in_diff_ref " << mat_in_diff_ref;
AssertEqual(mat_in_diff, mat_in_diff_ref);
std::vector<int> dims;
int n;
while(stream >> n){
dims.push_back(n);
delete c;
}
std::string comp_data_str, matrix_str;
std::getline(infile, comp_data_str);
std::getline(infile, matrix_str);
Convolutional2DComponent* c = new Convolutional2DComponent(dims[0], dims[1]);
std::istringstream is_comp_data(comp_data_str);
c->ReadData(is_comp_data, false);