Commit 18085e41 authored by Dan Povey's avatar Dan Povey
Browse files

(Joint work with Vijay Peddinti), add recipe that supports non-contiguous...

(Joint work with Vijay Peddinti), add recipe that supports non-contiguous splicing in the middle of the network.  This involved some refactoring of the core nnet2 code, to make the update efficient.  Need to add results and more example scripts... but it's about 1% abs better than previous online-nnet2 recipe.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4581 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 62a04e36
# config for high-resolution MFCC features, intended for neural network training.
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--sample-frequency=8000 # Switchboard is sampled at 8kHz
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=40 # low cutoff frequency for mel bins
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
#!/bin/bash
. cmd.sh
set -e
stage=1
train_stage=-10
use_gpu=true
nnet2_online=nnet2_online_ms
splice_inds="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-3:3"
common_egs_dir=
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
parallel_opts="-l gpu=1"
num_threads=1
minibatch_size=512
# the _a is in case I want to change the parameters.
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
fi
dir=exp/$nnet2_online/nnet_a
if [ $stage -le 1 ]; then
mfccdir=mfcc_hires
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
date=$(date +'%m_%d_%H_%M')
utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage
fi
utils/copy_data_dir.sh data/train data/train_hires
steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
--cmd "$train_cmd" data/train_hires exp/make_hires/train $mfccdir;
steps/compute_cmvn_stats.sh data/train_hires exp/make_hires/train $mfccdir;
# Remove the small number of utterances that couldn't be extracted for some
# reason (e.g. too short; no such file).
utils/fix_data_dir.sh data/train_hires;
# Create MFCCs for the eval set
utils/copy_data_dir.sh data/eval2000 data/eval2000_hires
steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \
data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
steps/compute_cmvn_stats.sh data/eval2000_hires exp/make_hires/eval2000 $mfccdir;
utils/fix_data_dir.sh data/eval2000_hires # remove segments with problems
# Use the first 4k sentences as dev set. Note: when we trained the LM, we used
# the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
# LM training data. However, they will be in the lexicon, plus speakers
# may overlap, so it's still not quite equivalent to a test set.
utils/subset_data_dir.sh --first data/train_hires 4000 data/train_hires_dev ;# 5hr 6min
n=$[`cat data/train/segments | wc -l` - 4000]
utils/subset_data_dir.sh --last data/train_hires $n data/train_hires_nodev ;
# create a 100k subset for the lda+mllt training
utils/subset_data_dir.sh --first data/train_hires_nodev 100000 data/train_hires_100k;
local/remove_dup_utts.sh 200 data/train_hires_100k data/train_hires_100k_nodup;
local/remove_dup_utts.sh 300 data/train_hires_nodev data/train_hires_nodup # 286hr
fi
if [ $stage -le 2 ]; then
mkdir -p exp/$nnet2_online
# We need to build a small system just because we need the LDA+MLLT transform
# to train the diag-UBM on top of. We use --num-iters 13 because after we get
# the transform (12th iter is the last), any further training is pointless.
# this decision is based on fisher_english
steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 13 \
--splice-opts "--left-context=3 --right-context=3" \
5500 90000 data/train_hires_100k_nodup data/lang exp/tri2_ali_100k_nodup exp/$nnet2_online/tri3b
fi
if [ $stage -le 3 ]; then
# To train a diagonal UBM we don't need very much data, so use the smallest subset.
steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
data/train_hires_30k_nodup 512 exp/$nnet2_online/tri3b exp/$nnet2_online/diag_ubm
fi
if [ $stage -le 4 ]; then
# iVector extractors can be sensitive to the amount of data, but this one has a
# fairly small dim (defaults to 100) so we don't use all of it, we use just the
# 100k subset (just under half the data).
steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
data/train_hires_100k_nodup exp/$nnet2_online/diag_ubm exp/$nnet2_online/extractor || exit 1;
fi
if [ $stage -le 5 ]; then
# We extract iVectors on all the train_nodup data, which will be what we
# train the system on.
# having a larger number of speakers is helpful for generalization, and to
# handle per-utterance decoding well (iVector starts at zero).
steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/train_hires_nodup data/train_hires_nodup_max2
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
data/train_hires_nodup_max2 exp/$nnet2_online/extractor exp/$nnet2_online/ivectors_train_nodup2 || exit 1;
fi
if [ $stage -le 6 ]; then
# Because we have a lot of data here and we don't want the training to take
# too long so we reduce the number of epochs from the defaults (15 + 5) to (5
# + 2), and the (initial,final) learning rate from the defaults (0.04, 0.004)
# to (0.01, 0.001).
# decided to let others run their jobs too (we only have 10 GPUs on our queue
# at JHU). The number of parameters is smaller than the baseline system we had in
# mind (../nnet2/run_5d_gpu.sh, which had pnorm input/output dim 3000/300 and
# 5 hidden layers, versus our 3000/300 and 5 hidden layers, even though we're
# training on more data than the baseline system. The motivation here is that we
# want to demonstrate the capability of doing real-time decoding, and if the
# network was too bug we wouldn't be able to decode in real-time using a CPU.
steps/nnet2/train_pnorm_multisplice.sh --stage $train_stage \
--splice-indexes "$splice_inds" \
--feat-type raw \
--online-ivector-dir exp/$nnet2_online/ivectors_train_nodup2 \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--num-threads "$num_threads" \
--minibatch-size "$minibatch_size" \
--parallel-opts "$parallel_opts" \
--num-jobs-nnet 6 \
--num-epochs 5 \
--add-layers-period 1 \
--num-hidden-layers 4 \
--mix-up 4000 \
--initial-learning-rate 0.01 --final-learning-rate 0.001 \
--cmd "$decode_cmd" \
--egs-dir "$common_egs_dir" \
--pnorm-input-dim 3000 \
--pnorm-output-dim 300 \
data/train_hires_nodup data/lang exp/tri4b_ali_nodup $dir || exit 1;
fi
if [ $stage -le 7 ]; then
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
data/${data} exp/$nnet2_online/extractor exp/$nnet2_online/ivectors_${data} || exit 1;
done
fi
if [ $stage -le 8 ]; then
# this does offline decoding that should give the same results as the real
# online decoding (the one with --per-utt true)
for lm_suffix in tg fsh_tgpr; do
graph_dir=exp/tri4b/graph_sw1_${lm_suffix}
# use already-built graphs.
for data in eval2000_hires train_hires_dev; do
steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --config conf/decode.config \
--online-ivector-dir exp/$nnet2_online/ivectors_${data} \
$graph_dir data/${data} $dir/decode_${data}_sw1_${lm_suffix} || exit 1;
done
done
fi
if [ $stage -le 9 ]; then
# If this setup used PLP features, we'd have to give the option --feature-type plp
# to the script below.
steps/online/nnet2/prepare_online_decoding.sh --mfcc-config conf/mfcc_hires.conf \
data/lang exp/$nnet2_online/extractor "$dir" ${dir}_online || exit 1;
fi
if [ $stage -le 10 ]; then
# do the actual online decoding with iVectors, carrying info forward from
# previous utterances of the same speaker.
for lm_suffix in tg fsh_tgpr; do
graph_dir=exp/tri4b/graph_sw1_${lm_suffix}
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
"$graph_dir" data/${data} ${dir}_online/decode_${data}_sw1_${lm_suffix} || exit 1;
done
done
fi
if [ $stage -le 11 ]; then
# this version of the decoding treats each utterance separately
# without carrying forward speaker information.
for lm_suffix in tg fsh_tgpr; do
graph_dir=exp/tri4b/graph_sw1_${lm_suffix}
for data in eval2000_hires train_hires_dev; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
--per-utt true \
"$graph_dir" data/${data} ${dir}_online/decode_${data}_sw1_${lm_suffix}_per_utt || exit 1;
done
done
fi
exit 0;
# get results on Dev with this command:
for x in exp/$nnet2_online/nnet_a/decode_train_dev_sw1_*; do grep WER $x/wer_* | utils/best_wer.sh; done
_mfcc_hires# and results on eval2000 with this command:
for x in exp/$nnet2_online/nnet_a/decode_eval2000_*; do grep Sum $x/score_*/*sys | utils/best_wer.sh; done
......@@ -21,6 +21,8 @@ num_jobs_nnet=16 # Number of neural net jobs to run in parallel
stage=0
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
splice_width=4 # meaning +- 4 frames on each side for second LDA
left_context=
right_context=
random_copy=false
online_ivector_dir=
ivector_randomize_prob=0.0 # if >0.0, randomizes iVectors during training with
......@@ -49,6 +51,8 @@ if [ $# != 4 ]; then
echo " --feat-type <lda|raw> # (by default it tries to guess). The feature type you want"
echo " # to use as input to the neural net."
echo " --splice-width <width;4> # Number of frames on each side to append for feature input"
echo " --left-context <width;4> # Number of frames on left side to append for feature input, overrides splice-width"
echo " --right-context <width;4> # Number of frames on right side to append for feature input, overrides splice-width"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
......@@ -64,6 +68,8 @@ lang=$2 # kept for historical reasons, but never used.
alidir=$3
dir=$4
[ -z "$left_context" ] && left_context=splice_width
[ -z "$right_context" ] && right_context=splice_width
# Check some files.
......@@ -180,7 +186,7 @@ done
remove () { for x in $*; do [ -L $x ] && rm $(readlink -f $x); rm $x; done }
nnet_context_opts="--left-context=$splice_width --right-context=$splice_width"
nnet_context_opts="--left-context=$left_context --right-context=$right_context"
mkdir -p $dir/egs
if [ $stage -le 2 ]; then
......
......@@ -11,6 +11,8 @@ cmd=run.pl
feat_type=
stage=0
splice_width=4 # meaning +- 4 frames on each side for second LDA
left_context= # left context for second LDA
right_context= # right context for second LDA
rand_prune=4.0 # Relates to a speedup we do for LDA.
within_class_factor=0.0001 # This affects the scaling of the transform rows...
# sorry for no explanation, you'll have to see the code.
......@@ -41,6 +43,8 @@ if [ $# != 4 ]; then
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --left-context <width;4> # Number of frames on left side to append for feature input, overrides splice-width"
echo " --right-context <width;4> # Number of frames on right side to append for feature input, overrides splice-width"
echo " --stage <stage|0> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
echo " --online-vector-dir <dir|none> # Directory produced by"
......@@ -53,6 +57,8 @@ lang=$2
alidir=$3
dir=$4
[ -z "$left_context" ] && left_context=splice_width
[ -z "$right_context" ] && right_context=splice_width
[ ! -z "$online_ivector_dir" ] && \
extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
......@@ -128,7 +134,7 @@ feats_one="$(echo "$feats" | sed s:JOB:1:g)"
feat_dim=$(feat-to-dim "$feats_one" -) || exit 1;
# by default: no dim reduction.
spliced_feats="$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |"
spliced_feats="$feats splice-feats --left-context=$left_context --right-context=$right_context ark:- ark:- |"
if [ ! -z "$online_ivector_dir" ]; then
ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
......
# Copyright 2014 Johns Hopkins University (Authors: Daniel Povey and Vijayaditya Peddinti). Apache 2.0.
# Creates the nnet.config and hidde_*.config scripts used in train_pnorm_multisplice.sh
# Parses the splice string to generate relevant variables for get_egs.sh, get_lda.sh and nnet/hidden.config files
import re, argparse, sys, math
# returns the set of frame indices required to perform the convolution
# between sequences with frame indices in x and y
def get_convolution_index_set(x, y):
z = []
for i in xrange(len(x)):
for j in xrange(len(y)):
z.append(x[i]+y[j])
z = list(set(z))
z.sort()
return z
def parse_splice_string(splice_string):
layerwise_splice_indexes = splice_string.split('layer')[1:]
print splice_string.split('layer')
contexts={}
first_right_context = 0 # default value
first_left_context = 0 # default value
nnet_frame_indexes = [0] # frame indexes required by the network
# at the initial layer (will be used in
# determining the context for get_egs.sh)
try:
for cur_splice_indexes in layerwise_splice_indexes:
layer_index, frame_indexes = cur_splice_indexes.split("/")
frame_indexes = map(lambda x: int(x), frame_indexes.split(':'))
layer_index = int(layer_index)
assert(layer_index >= 0)
if layer_index == 0:
first_left_context = min(frame_indexes)
first_right_context = max(frame_indexes)
try:
assert(frame_indexes == range(first_left_context, first_right_context+1))
except AssertionError:
raise Exception('Currently the first splice component just accepts contiguous context.')
try:
assert((first_left_context <=0) and (first_right_context >=0))
except AssertionError:
raise Exception("""get_lda.sh script does not support postive left-context or negative right context.
left context provided is %d and right context provided is %d.""" % (first_left_context, first_right_context))
# convolve the current splice indices with the splice indices until last layer
nnet_frame_indexes = get_convolution_index_set(frame_indexes, nnet_frame_indexes)
cur_context = ":".join(map(lambda x: str(x), frame_indexes))
contexts[layer_index] = cur_context
except ValueError:
raise Exception('Unknown format in splice_indexes variable: {0}'.format(params.splice_indexes))
print nnet_frame_indexes
max_left_context = min(nnet_frame_indexes)
max_right_context = max(nnet_frame_indexes)
return [contexts, ' nnet_left_context={0};\n nnet_right_context={1}\n first_left_context={2};\n first_right_context={3}\n'.format(abs(max_left_context), abs(max_right_context), abs(first_left_context), abs(first_right_context) )]
def create_config_files(output_dir, params):
pnorm_input_dim = params.pnorm_input_dim
pnorm_output_dim = params.pnorm_output_dim
contexts, context_variables = parse_splice_string(params.splice_indexes)
var_file = open("{0}/vars".format(output_dir), "w")
var_file.write(context_variables)
var_file.close()
try:
assert(max(contexts.keys()) < params.num_hidden_layers)
except AssertionError:
raise Exception("""Splice string provided is {2}.
Number of hidden layers {0}, is less than the number of context specifications provided.
Splicing is supported only until layer {1}.""".format(params.num_hidden_layers, params.num_hidden_layers - 1, params.splice_indexes))
stddev=1.0/math.sqrt(pnorm_input_dim)
try :
nnet_config = ["SpliceComponent input-dim={0} context={1} const-component-dim={2}".format(params.total_input_dim, contexts[0], params.ivector_dim),
"FixedAffineComponent matrix={0}".format(params.lda_mat),
"AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev={4} bias-stddev={5}".format(params.lda_dim, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev),
"PnormComponent input-dim={0} output-dim={1} p={2}".format(pnorm_input_dim, pnorm_output_dim, params.pnorm_p),
"NormalizeComponent dim={0}".format(pnorm_output_dim),
"AffineComponentPreconditionedOnline input-dim={0} output-dim={1} {2} learning-rate={3} param-stddev=0 bias-stddev=0".format(pnorm_output_dim, params.num_targets, params.online_preconditioning_opts, params.initial_learning_rate),
"SoftmaxComponent dim={0}".format(params.num_targets)]
nnet_config_file = open(("{0}/nnet.config").format(output_dir), "w")
nnet_config_file.write("\n".join(nnet_config))
nnet_config_file.close()
except KeyError:
raise Exception('A splice layer is expected to be the first layer. Provide a context for the first layer.')
for i in xrange(1, params.num_hidden_layers): #just run till num_hidden_layers-1 since we do not add splice before the final affine transform
lines=[]
context_len = 1
if contexts.has_key(i):
# Adding the splice component as a context is provided
lines.append("SpliceComponent input-dim=%d context=%s " % (pnorm_output_dim, contexts[i]))
context_len = len(contexts[i].split(":"))
# Add the hidden layer, which is a composition of an affine component, pnorm component and normalization component
lines.append("AffineComponentPreconditionedOnline input-dim=%d output-dim=%d %s learning-rate=%f param-stddev=%f bias-stddev=%f"
% ( pnorm_output_dim*context_len, pnorm_input_dim, params.online_preconditioning_opts, params.initial_learning_rate, stddev, params.bias_stddev))
lines.append("PnormComponent input-dim=%d output-dim=%d p=%d" % (pnorm_input_dim, pnorm_output_dim, params.pnorm_p))
lines.append("NormalizeComponent dim={0}".format(pnorm_output_dim))
out_file = open("{0}/hidden_{1}.config".format(output_dir, i), 'w')
out_file.write("\n".join(lines))
out_file.close()
if __name__ == "__main__":
print " ".join(sys.argv)
parser = argparse.ArgumentParser()
parser.add_argument('--splice-indexes', type=str, help='string specifying the indexes for the splice layers throughout the network')
parser.add_argument('--total-input-dim', type=int, help='dimension of the input to the network')
parser.add_argument('--ivector-dim', type=int, help='dimension of the ivector portion of the neural network input')
parser.add_argument('--lda-mat', type=str, help='lda-matrix used after the first splice component')
parser.add_argument('--lda-dim', type=str, help='dimension of the lda output')
parser.add_argument('--pnorm-input-dim', type=int, help='dimension of input to pnorm layer')
parser.add_argument('--pnorm-output-dim', type=int, help='dimension of output of pnorm layer')
parser.add_argument('--pnorm-p', type=int, help='type of norm in the p-norm component')
parser.add_argument('--online-preconditioning-opts', type=str, help='extra options for the AffineComponentPreconditionedOnline component')
parser.add_argument('--initial-learning-rate', type=float, help='')
parser.add_argument('--num-targets', type=int, help='#targets for the neural network ')
parser.add_argument('--num-hidden-layers', type=int, help='#hidden layers in the neural network ')
parser.add_argument('--bias-stddev', type=float, help='standard deviation of r.v. used for bias component initialization')
parser.add_argument("mode", type=str, help="contexts|configs")
parser.add_argument("output_dir", type=str, help="output directory to store the files")
params = parser.parse_args()
print params
if params.mode == "contexts":
[context, context_variables] = parse_splice_string(params.splice_indexes)
var_file = open("{0}/vars".format(params.output_dir), "w")
var_file.write(context_variables)
var_file.close()
elif params.mode == "configs":
create_config_files(params.output_dir, params)
else:
raise Exception("mode has to be in the set {contexts, configs}")
......@@ -47,7 +47,13 @@ void UnitTestGenericComponentInternal(const Component &component) {
CuMatrix<BaseFloat> input(num_egs, input_dim),
output(num_egs, output_dim);
input.SetRandn();
int32 num_chunks = 1,
first_offset = 0,
last_offset = num_egs-1;
ChunkInfo in_info(input_dim, num_chunks, first_offset, last_offset);
ChunkInfo out_info(output_dim, num_chunks, first_offset, last_offset);
int32 rand_seed = Rand();
RandomComponent *rand_component =
......@@ -56,7 +62,7 @@ void UnitTestGenericComponentInternal(const Component &component) {
srand(rand_seed);
rand_component->ResetGenerator();
}
component.Propagate(input, 1, &output);
component.Propagate(in_info, out_info, input, &output);
{
bool binary = (Rand() % 2 == 0);
Output ko("tmpf", binary);
......@@ -88,11 +94,10 @@ void UnitTestGenericComponentInternal(const Component &component) {
(component_copy->BackpropNeedsInput() ? input : empty_mat),
&output_ref =
(component_copy->BackpropNeedsOutput() ? output : empty_mat);
int32 num_chunks = 1;
component_copy->Backprop(input_ref, output_ref,
output_deriv, num_chunks, NULL, &input_deriv);
component_copy->Backprop(in_info, out_info, input_ref, output_ref,
output_deriv, NULL, &input_deriv);
int32 num_ok = 0, num_bad = 0, num_tries = 10;
KALDI_LOG << "Comparing feature gradients " << num_tries << " times.";
......@@ -122,7 +127,7 @@ void UnitTestGenericComponentInternal(const Component &component) {
rand_component->ResetGenerator();
}
}
component.Propagate(perturbed_input, 1, &perturbed_output);
component.Propagate(in_info, out_info, perturbed_input, &perturbed_output);
CuVector<BaseFloat> perturbed_output_objfs(num_egs);
perturbed_output_objfs.AddMatVec(1.0, perturbed_output, kNoTrans,
objf_vec, 0.0);
......@@ -174,10 +179,8 @@ void UnitTestGenericComponentInternal(const Component &component) {
output_deriv.Row(i).CopyFromVec(objf_vec);
CuMatrix<BaseFloat> input_deriv; // (input.NumRows(), input.NumCols());
int32 num_chunks = 1;
// This will compute the parameter gradient.
ucomponent->Backprop(input, output, output_deriv, num_chunks,
ucomponent->Backprop(in_info, out_info, input, output, output_deriv,
gradient_ucomponent, &input_deriv);
// Now compute the perturbed objf.
......@@ -192,7 +195,7 @@ void UnitTestGenericComponentInternal(const Component &component) {
rand_component->ResetGenerator();
}
}
perturbed_ucomponent->Propagate(input, 1, &output_perturbed);
perturbed_ucomponent->Propagate(in_info, out_info, input, &output_perturbed);
CuVector<BaseFloat> output_objfs_perturbed(num_egs);
output_objfs_perturbed.AddMatVec(1.0, output_perturbed,
kNoTrans, objf_vec, 0.0);
......@@ -679,20 +682,30 @@ void UnitTestParsing() {
}
void BasicDebugTestForSplice(bool output=false) {
int32 C=5;
int32 K=4, contextLen=1;
int32 R=3+2 * contextLen;
int32 C = 5,
K = 4,
context_len = 1,
R = 3 + 2 * context_len;
SpliceComponent *c = new SpliceComponent();
c->Init(C, contextLen, contextLen, K);
std::vector<int32> context(2 * context_len + 1);
for (int32 i = -1 * context_len; i <= context_len; i++)
context[i + context_len] = i;
c->Init(C, context, K);
ChunkInfo in_info = ChunkInfo(C, 1, 0, R - 1),
out_info = ChunkInfo((C - K) * context.size() + K, 1, context_len, R - 1 - context_len);
CuMatrix<BaseFloat> in(R, C), in_deriv(R, C);
CuMatrix<BaseFloat> out(R, c->OutputDim());
CuMatrix<BaseFloat> out(R - 2 * context_len, c->OutputDim());
in_info.Check();
out_info.Check();
in.SetRandn();
if (output)
KALDI_LOG << in;
c->Propagate(in, 1, &out);
c->Propagate(in_info, out_info, in, &out);
if (output)
KALDI_LOG << out;
......@@ -707,8 +720,7 @@ void BasicDebugTestForSplice(bool output=false) {
if (output)
KALDI_LOG << out;
int32 num_chunks = 1;
c->Backprop(in, in, out, num_chunks, c, &in_deriv);
c->Backprop(in_info, out_info, in, in, out, c, &in_deriv);
if (output)
KALDI_LOG << in_deriv;
......@@ -716,20 +728,25 @@ void BasicDebugTestForSplice(bool output=false) {
}
void BasicDebugTestForSpliceMax(bool output=false) {
int32 C=5;
int32 contextLen=2;
int32 R= 3 + 2*contextLen;
int32 C=5,
context_len=2,
R= 3 + 2*context_len;
SpliceMaxComponent *c = new SpliceMaxComponent();
c->Init(C, contextLen, contextLen);
std::vector<int32> context(2 * context_len + 1);
for (int32 i = -1 * context_len; i <= context_len; i++)
context[i + context_len] = i;
c->Init(C, context);
CuMatrix<BaseFloat> in(R, C), in_deriv(R, C);
CuMatrix<BaseFloat> out(R, c->OutputDim());
ChunkInfo in_info = ChunkInfo(C, 1, 0, R - 1),
out_info = ChunkInfo(C, 1, context_len, R - 1 - context_len);
in.SetRandn();
if (output)
KALDI_LOG << in;
c->Propagate(in, 1, &out);
c->Propagate(in_info, out_info, in, &out);
if (output)
KALDI_LOG << out;
......@@ -739,8 +756,7 @@ void BasicDebugTestForSpliceMax(bool output=false) {
if (output)
KALDI_LOG << out;
int32 num_chunks = 1;
c->Backprop(in, in, out, num_chunks, c, &in_deriv);
c->Backprop(in_info, out_info, in, in, out, c, &in_deriv);
if (output)
KALDI_LOG << in_deriv;
......
This diff is collapsed.
This diff is collapsed.
......@@ -87,7 +87,7 @@ class NnetDiscriminativeUpdater {
// another Nnet, in gradient-computation case, or