Commit d5ab6a10 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1: adding check that CUDA compiled-in in NN training scripts,

- utils/nnet/make_nnet_proto.py : adding comments



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4565 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 860e1c7e
......@@ -98,6 +98,9 @@ printf "\t Train-set : $data \n"
[ -e $dir/${nn_depth}.dbn ] && echo "$0 Skipping, already have $dir/${nn_depth}.dbn" && exit 0
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log
###### PREPARE FEATURES ######
......
......@@ -121,6 +121,9 @@ mkdir -p $dir/{log,nnet}
# skip when already trained
[ -e $dir/final.nnet ] && printf "\nSKIPPING TRAINING... ($0)\nnnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))\n\n" && exit 0
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
###### PREPARE ALIGNMENTS ######
echo
echo "# PREPARING ALIGNMENTS"
......
......@@ -52,12 +52,14 @@ srcdir=$3
alidir=$4
denlatdir=$5
dir=$6
mkdir -p $dir/log
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir
......@@ -65,7 +67,6 @@ cp $alidir/{final.mdl,tree} $dir
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
#Get the files we will need
nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet);
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
......
......@@ -52,12 +52,14 @@ srcdir=$3
alidir=$4
denlatdir=$5
dir=$6
mkdir -p $dir/log
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir
......
......@@ -84,6 +84,7 @@ def Glorot(dim1, dim2):
### Print prototype of the network
###
# NO HIDDEN LAYER, ADDING BOTTLENECK!
# No hidden layer while adding bottleneck means:
# - add bottleneck layer + hidden layer + output layer
if num_hid_layers == 0 and o.bottleneck_dim != 0:
......@@ -110,6 +111,7 @@ if num_hid_layers == 0 and o.bottleneck_dim != 0:
# We are done!
sys.exit(0)
# NO HIDDEN LAYERS!
# Add only last layer (logistic regression)
if num_hid_layers == 0:
if o.with_proto_head : print "<NnetProto>"
......@@ -121,29 +123,33 @@ if num_hid_layers == 0:
# We are done!
sys.exit(0)
# Assuming we have >0 hidden layers
# THE USUAL DNN PROTOTYPE STARTS HERE!
# Assuming we have >0 hidden layers,
assert(num_hid_layers > 0)
# Begin the prototype
# Begin the prototype,
if o.with_proto_head : print "<NnetProto>"
# First AffineTranform
# First AffineTranform,
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
(feat_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
(o.param_stddev_factor * Glorot(feat_dim, num_hid_neurons) * \
(math.sqrt(1.0/12.0) if o.smaller_input_weights else 1.0)), o.max_norm)
# stddev(U[0,1]) = sqrt(1/12); reducing stddev of weights,
# the dynamic range of input data is larger than of a Sigmoid.
# Note.: compensating dynamic range mismatch between input features and Sigmoid-hidden layers,
# i.e. mapping the std-dev of N(0,1) (input features) to std-dev of U[0,1] (sigmoid-outputs).
# This is done by multiplying with stddev(U[0,1]) = sqrt(1/12).
# The stddev of weights is consequently reduced by 0.29x.
print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons)
# Internal AffineTransforms
# Internal AffineTransforms,
for i in range(num_hid_layers-1):
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
(num_hid_neurons, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
(o.param_stddev_factor * Glorot(num_hid_neurons, num_hid_neurons)), o.max_norm)
print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons)
# Optionaly add bottleneck
# Optionaly add bottleneck,
if o.bottleneck_dim != 0:
assert(o.bottleneck_dim > 0)
# 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment