Commit d5ab6a10 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1: adding check that CUDA compiled-in in NN training scripts,

- utils/nnet/make_nnet_proto.py : adding comments



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4565 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 860e1c7e
...@@ -98,6 +98,9 @@ printf "\t Train-set : $data \n" ...@@ -98,6 +98,9 @@ printf "\t Train-set : $data \n"
[ -e $dir/${nn_depth}.dbn ] && echo "$0 Skipping, already have $dir/${nn_depth}.dbn" && exit 0 [ -e $dir/${nn_depth}.dbn ] && echo "$0 Skipping, already have $dir/${nn_depth}.dbn" && exit 0
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log mkdir -p $dir/log
###### PREPARE FEATURES ###### ###### PREPARE FEATURES ######
......
...@@ -121,6 +121,9 @@ mkdir -p $dir/{log,nnet} ...@@ -121,6 +121,9 @@ mkdir -p $dir/{log,nnet}
# skip when already trained # skip when already trained
[ -e $dir/final.nnet ] && printf "\nSKIPPING TRAINING... ($0)\nnnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))\n\n" && exit 0 [ -e $dir/final.nnet ] && printf "\nSKIPPING TRAINING... ($0)\nnnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))\n\n" && exit 0
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
###### PREPARE ALIGNMENTS ###### ###### PREPARE ALIGNMENTS ######
echo echo
echo "# PREPARING ALIGNMENTS" echo "# PREPARING ALIGNMENTS"
......
...@@ -52,12 +52,14 @@ srcdir=$3 ...@@ -52,12 +52,14 @@ srcdir=$3
alidir=$4 alidir=$4
denlatdir=$5 denlatdir=$5
dir=$6 dir=$6
mkdir -p $dir/log
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1; [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done done
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir cp $alidir/{final.mdl,tree} $dir
...@@ -65,7 +67,6 @@ cp $alidir/{final.mdl,tree} $dir ...@@ -65,7 +67,6 @@ cp $alidir/{final.mdl,tree} $dir
silphonelist=`cat $lang/phones/silence.csl` || exit 1; silphonelist=`cat $lang/phones/silence.csl` || exit 1;
#Get the files we will need #Get the files we will need
nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet); nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet);
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1; [ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
......
...@@ -52,12 +52,14 @@ srcdir=$3 ...@@ -52,12 +52,14 @@ srcdir=$3
alidir=$4 alidir=$4
denlatdir=$5 denlatdir=$5
dir=$6 dir=$6
mkdir -p $dir/log
for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1; [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done done
# check if CUDA is compiled in,
cuda-compiled || { echo 'CUDA was not compiled in, skipping! Check src/kaldi.mk and src/configure' && exit 1; }
mkdir -p $dir/log mkdir -p $dir/log
cp $alidir/{final.mdl,tree} $dir cp $alidir/{final.mdl,tree} $dir
......
...@@ -84,6 +84,7 @@ def Glorot(dim1, dim2): ...@@ -84,6 +84,7 @@ def Glorot(dim1, dim2):
### Print prototype of the network ### Print prototype of the network
### ###
# NO HIDDEN LAYER, ADDING BOTTLENECK!
# No hidden layer while adding bottleneck means: # No hidden layer while adding bottleneck means:
# - add bottleneck layer + hidden layer + output layer # - add bottleneck layer + hidden layer + output layer
if num_hid_layers == 0 and o.bottleneck_dim != 0: if num_hid_layers == 0 and o.bottleneck_dim != 0:
...@@ -110,6 +111,7 @@ if num_hid_layers == 0 and o.bottleneck_dim != 0: ...@@ -110,6 +111,7 @@ if num_hid_layers == 0 and o.bottleneck_dim != 0:
# We are done! # We are done!
sys.exit(0) sys.exit(0)
# NO HIDDEN LAYERS!
# Add only last layer (logistic regression) # Add only last layer (logistic regression)
if num_hid_layers == 0: if num_hid_layers == 0:
if o.with_proto_head : print "<NnetProto>" if o.with_proto_head : print "<NnetProto>"
...@@ -121,29 +123,33 @@ if num_hid_layers == 0: ...@@ -121,29 +123,33 @@ if num_hid_layers == 0:
# We are done! # We are done!
sys.exit(0) sys.exit(0)
# Assuming we have >0 hidden layers
# THE USUAL DNN PROTOTYPE STARTS HERE!
# Assuming we have >0 hidden layers,
assert(num_hid_layers > 0) assert(num_hid_layers > 0)
# Begin the prototype # Begin the prototype,
if o.with_proto_head : print "<NnetProto>" if o.with_proto_head : print "<NnetProto>"
# First AffineTranform # First AffineTranform,
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \ print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
(feat_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \ (feat_dim, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
(o.param_stddev_factor * Glorot(feat_dim, num_hid_neurons) * \ (o.param_stddev_factor * Glorot(feat_dim, num_hid_neurons) * \
(math.sqrt(1.0/12.0) if o.smaller_input_weights else 1.0)), o.max_norm) (math.sqrt(1.0/12.0) if o.smaller_input_weights else 1.0)), o.max_norm)
# stddev(U[0,1]) = sqrt(1/12); reducing stddev of weights, # Note.: compensating dynamic range mismatch between input features and Sigmoid-hidden layers,
# the dynamic range of input data is larger than of a Sigmoid. # i.e. mapping the std-dev of N(0,1) (input features) to std-dev of U[0,1] (sigmoid-outputs).
# This is done by multiplying with stddev(U[0,1]) = sqrt(1/12).
# The stddev of weights is consequently reduced by 0.29x.
print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons) print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons)
# Internal AffineTransforms # Internal AffineTransforms,
for i in range(num_hid_layers-1): for i in range(num_hid_layers-1):
print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \ print "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> %f <BiasRange> %f <ParamStddev> %f <MaxNorm> %f" % \
(num_hid_neurons, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \ (num_hid_neurons, num_hid_neurons, o.hid_bias_mean, o.hid_bias_range, \
(o.param_stddev_factor * Glorot(num_hid_neurons, num_hid_neurons)), o.max_norm) (o.param_stddev_factor * Glorot(num_hid_neurons, num_hid_neurons)), o.max_norm)
print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons) print "%s <InputDim> %d <OutputDim> %d" % (o.activation_type, num_hid_neurons, num_hid_neurons)
# Optionaly add bottleneck # Optionaly add bottleneck,
if o.bottleneck_dim != 0: if o.bottleneck_dim != 0:
assert(o.bottleneck_dim > 0) assert(o.bottleneck_dim > 0)
# 25% smaller stddev -> small bottleneck range, 10x smaller learning rate # 25% smaller stddev -> small bottleneck range, 10x smaller learning rate
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment