Commit 19adf207 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1:

- updating the CNN recipe (results not better than DNN)
- removing old and unused initialization scripts



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4470 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 866fafc1
......@@ -163,20 +163,21 @@ exit 0
%WER 1.36 [ 170 / 12533, 15 ins, 34 del, 121 sub ] exp/nnet5e_mpe_gpu/decode_epoch2/wer_3
%WER 7.73 [ 969 / 12533, 74 ins, 157 del, 738 sub ] exp/nnet5e_mpe_gpu/decode_ug_epoch4/wer_9
# DNN systems (Karel)
# note from Dan-- these are from an older RESULTS file as I did not rerun these
# last time I created this.
# Per-frame cross-entropy training
%WER 1.66 [ 208 / 12533, 27 ins, 49 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
%WER 7.80 [ 978 / 12533, 83 ins, 151 del, 744 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_6
# DNN systems (Karel - 25.9.2014)
# Per-frame cross-entropy training
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
%WER 7.77 [ 974 / 12533, 81 ins, 158 del, 735 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_7
# Sequence-based sMBR training
%WER 1.64 [ 206 / 12533, 24 ins, 49 del, 133 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_4
%WER 1.62 [ 203 / 12533, 25 ins, 46 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it2/wer_4
%WER 1.59 [ 199 / 12533, 25 ins, 42 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it3/wer_4
%WER 1.60 [ 201 / 12533, 35 ins, 33 del, 133 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it4/wer_3
%WER 1.58 [ 198 / 12533, 31 ins, 37 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it5/wer_4
%WER 1.59 [ 199 / 12533, 31 ins, 37 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it6/wer_4
%WER 1.61 [ 202 / 12533, 32 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_3
%WER 1.62 [ 203 / 12533, 33 ins, 42 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it2/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 42 del, 130 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it3/wer_3
%WER 1.64 [ 206 / 12533, 32 ins, 42 del, 132 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it4/wer_3
%WER 1.63 [ 204 / 12533, 32 ins, 41 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it5/wer_3
%WER 1.64 [ 206 / 12533, 20 ins, 58 del, 128 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it6/wer_5
# CNN systems (Karel - 25.9.2014)
%WER 1.89 [ 237 / 12533, 30 ins, 47 del, 160 sub ] exp/cnn4c/decode/wer_3 # per-frame training
# Some system combination experiments.
......
--window-type=hamming # disable Dans window, use the standard
--use-energy=false # only fbank outputs
--dither=1
--num-mel-bins=40 # 8 filters/octave, 40 filters/16Khz as used by IBM
--htk-compat=true # try to make it compatible with HTK
#!/bin/bash
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
dev=data-fbank/test
train=data-fbank/train
dev_original=data/test
train_original=data/train
gmm=exp/tri3b
stage=0
. utils/parse_options.sh
# Make the FBANK features
if [ $stage -le 0 ]; then
# Dev set
mkdir -p $dev && cp $dev_original/* $dev && rm $dev/{feats,cmvn}.scp
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
$dev $dev/log $dev/data || exit 1;
steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1;
# Training set
mkdir -p $train && cp $train_original/* $train && rm $train/{feats,cmvn}.scp
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
$train $train/log $train/data || exit 1;
steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
# Split the training set
utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
fi
# Run the CNN pre-training.
if [ $stage -le 1 ]; then
dir=exp/cnn4c
ali=${gmm}_ali
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh \
--apply-cmvn true --norm-vars true --delta-order 2 --splice 5 \
--prepend-cnn-type cnn1d --cnn-proto-opts "--patch-dim1 8 --pitch-dim 3" \
--hid-layers 2 --learn-rate 0.008 --train-opts "--verbose 2" \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# Pre-train stack of RBMs on top of the convolutional layers (4 layers, 1024 units)
if [ $stage -le 2 ]; then
dir=exp/cnn4c_pretrain-dbn
transf_cnn=exp/cnn4c/final.feature_transform_cnn # transform with convolutional layers
# Train
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --nn-depth 4 --hid-dim 1024 --rbm-iter 20 \
--feature-transform $transf_cnn --input-vis-type bern \
--param-stddev-first 0.05 --param-stddev 0.05 \
$train $dir || exit 1
fi
# Re-align using CNN
if [ $stage -le 3 ]; then
dir=exp/cnn4c
steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
$train data/lang $dir ${dir}_ali || exit 1
fi
# Train the DNN optimizing cross-entropy.
if [ $stage -le 4 ]; then
dir=exp/cnn4c_pretrain-dbn_dnn; [ ! -d $dir ] && mkdir -p $dir/log;
ali=exp/cnn4c_ali
feature_transform=exp/cnn4c/final.feature_transform
feature_transform_dbn=exp/cnn4c_pretrain-dbn/final.feature_transform
dbn=exp/cnn4c_pretrain-dbn/4.dbn
cnn_dbn=$dir/cnn_dbn.nnet
{ # Concatenate CNN layers and DBN,
num_components=$(nnet-info $feature_transform | grep -m1 num-components | awk '{print $2;}')
nnet-concat "nnet-copy --remove-first-layers=$num_components $feature_transform_dbn - |" $dbn $cnn_dbn \
2>$dir/log/concat_cnn_dbn.log || exit 1
}
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $cnn_dbn --hid-layers 0 \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. For RM good acwt is 0.2
dir=exp/cnn4c_pretrain-dbn_dnn_smbr
srcdir=exp/cnn4c_pretrain-dbn_dnn
acwt=0.2
# First we generate lattices and alignments:
if [ $stage -le 4 ]; then
steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
$train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
$train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
# Re-train the DNN by 6 iterations of sMBR
if [ $stage -le 5 ]; then
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \
$train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode
for ITER in 1 2 3 4 5 6; do
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmm/graph $dev $dir/decode_it${ITER} || exit 1
done
fi
echo Success
exit 0
......@@ -21,7 +21,7 @@
. ./path.sh ## Source the tools/utils (import the queue.pl)
# Config:
gmmdir=exp/tri3b
gmm=exp/tri3b
data_fmllr=data-fmllr-tri3b
stage=0 # resume training with --stage=N
# End of config.
......@@ -33,13 +33,13 @@ if [ $stage -le 0 ]; then
# test
dir=$data_fmllr/test
steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
--transform-dir $gmmdir/decode \
$dir data/test $gmmdir $dir/log $dir/data || exit 1
--transform-dir $gmm/decode \
$dir data/test $gmm $dir/log $dir/data || exit 1
# train
dir=$data_fmllr/train
steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \
--transform-dir ${gmmdir}_ali \
$dir data/train $gmmdir $dir/log $dir/data || exit 1
--transform-dir ${gmm}_ali \
$dir data/train $gmm $dir/log $dir/data || exit 1
# split the data : 90% train 10% cross-validation (held-out)
utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 || exit 1
fi
......@@ -55,7 +55,7 @@ fi
if [ $stage -le 2 ]; then
# Train the DNN optimizing per-frame cross-entropy.
dir=exp/dnn4b_pretrain-dbn_dnn
ali=${gmmdir}_ali
ali=${gmm}_ali
feature_transform=exp/dnn4b_pretrain-dbn/final.feature_transform
dbn=exp/dnn4b_pretrain-dbn/6.dbn
(tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log
......@@ -64,18 +64,18 @@ if [ $stage -le 2 ]; then
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
$data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmmdir/graph $data_fmllr/test $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmmdir/graph_ug $data_fmllr/test $dir/decode_ug || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph $data_fmllr/test $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.2 \
$gmm/graph_ug $data_fmllr/test $dir/decode_ug || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. We use usually good acwt 0.1
# with per-utterance updates. For RM good acwt is 0.2
dir=exp/dnn4b_pretrain-dbn_dnn_smbr
srcdir=exp/dnn4b_pretrain-dbn_dnn
acwt=0.1
acwt=0.2
if [ $stage -le 3 ]; then
# First we generate lattices and alignments:
......@@ -93,10 +93,7 @@ if [ $stage -le 4 ]; then
for ITER in 1 2 3 4 5 6; do
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmmdir/graph $data_fmllr/test $dir/decode_it${ITER} || exit 1
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmmdir/graph_ug $data_fmllr/test $dir/decode_ug_it${ITER} || exit 1
$gmm/graph $data_fmllr/test $dir/decode_it${ITER} || exit 1
done
fi
......
#!/bin/bash
# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely)
# Apache 2.0
# This example script trains a DNN on top of fMLLR features.
# The training is done in 3 stages,
#
# 1) RBM pre-training:
# in this unsupervised stage we train stack of RBMs,
# a good starting point for frame cross-entropy trainig.
# 2) frame cross-entropy training:
# the objective is to classify frames to correct pdfs.
# 3) sequence-training optimizing sMBR:
# the objective is to emphasize state-sequences with better
# frame accuracy w.r.t. reference alignment.
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
. ./path.sh ## Source the tools/utils (import the queue.pl)
dev=data-fbank/test
train=data-fbank/train
dev_original=data/test
train_original=data/train
gmm=exp/tri3b
stage=0
. utils/parse_options.sh || exit 1;
# Make the FBANK features
if [ $stage -le 0 ]; then
# Dev set
mkdir -p $dev && cp $dev_original/* $dev
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
$dev $dev/log $dev/data || exit 1;
steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1;
# Training set
mkdir -p $train && cp $train_original/* $train
steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd -tc 10" \
$train $train/log $train/data || exit 1;
steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
# Split the training set
utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
fi
if [ $stage -le 1 ]; then
# Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN)
dir=exp/dnn4c_pretrain-dbn
(tail --pid=$$ -F $dir/log/pretrain_dbn.log 2>/dev/null)& # forward log
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh \
--apply-cmvn true --norm-vars true --delta-order 2 --splice 5 \
--hid-dim 1024 --rbm-iter 20 $train $dir || exit 1;
fi
if [ $stage -le 2 ]; then
# Train the DNN optimizing per-frame cross-entropy.
dir=exp/dnn4c_pretrain-dbn_dnn
ali=${gmm}_ali
feature_transform=exp/dnn4c_pretrain-dbn/final.feature_transform
dbn=exp/dnn4c_pretrain-dbn/6.dbn
(tail --pid=$$ -F $dir/log/train_nnet.log 2>/dev/null)& # forward log
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;
# Decode (reuse HCLG graph)
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
fi
# Sequence training using sMBR criterion, we do Stochastic-GD
# with per-utterance updates. We use usually good acwt 0.1
dir=exp/dnn4c_pretrain-dbn_dnn_smbr
srcdir=exp/dnn4c_pretrain-dbn_dnn
acwt=0.1
if [ $stage -le 3 ]; then
# First we generate lattices and alignments:
steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \
$train data/lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
$train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 4 ]; then
# Re-train the DNN by 6 iterations of sMBR
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \
$train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode
for ITER in 1 2 3 4 5 6; do
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \
--nnet $dir/${ITER}.nnet --acwt $acwt \
$gmm/graph $dev $dir/decode_it${ITER} || exit 1
done
fi
echo Success
exit 0
# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
......@@ -229,5 +229,7 @@ local/run_sgmm2.sh
# local/run_nnet2.sh
# Karel's neural net recipe.
# local/run_dnn.sh
# local/nnet/run_dnn.sh
# Karel's CNN recipe.
# local/nnet/run_cnn.sh
......@@ -11,8 +11,8 @@ mlp_init= # select initialized MLP (override initialization)
mlp_proto= # select network prototype (initialize it)
proto_opts= # non-default options for 'make_nnet_proto.py'
feature_transform= # provide feature transform (=splice,rescaling,...) (don't build new one)
prepend_cnn=false # create nnet with convolutional layers
cnn_init_opts= # extra options for 'make_cnn_proto.py'
prepend_cnn_type=none # (none,cnn1d,cnn2d) create nnet with convolutional layers
cnn_proto_opts= # extra options for 'make_cnn_proto.py'
#
hid_layers=4 # nr. of hidden layers (prior to sotfmax or bottleneck)
hid_dim=1024 # select hidden dimension
......@@ -300,6 +300,7 @@ else
ark:- 2>$dir/log/nnet-forward-cmvn.log |\
compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
nnet-concat --binary=false $feature_transform_old - $feature_transform
[ ! -f $feature_transform ] && cat $dir/log/nnet-forward-cmvn.log && echo "Error: Global CMVN failed, was the CUDA GPU okay?" && echo && exit 1
fi
......@@ -332,16 +333,28 @@ if [[ -z "$mlp_init" && -z "$mlp_proto" ]]; then
# make network prototype
mlp_proto=$dir/nnet.proto
echo "Genrating network prototype $mlp_proto"
if [ $prepend_cnn == "false" ]; then
utils/nnet/make_nnet_proto.py $proto_opts \
${bn_dim:+ --bottleneck-dim=$bn_dim} \
$num_fea $num_tgt $hid_layers $hid_dim >$mlp_proto || exit 1
else
utils/nnet/make_cnn_proto.py $cnn_init_opts \
--splice $splice --delta-order $delta_order --dir $dir \
${bn_dim:+ --bottleneck-dim=$bn_dim} \
$num_fea $num_tgt $hid_layers $hid_dim >$mlp_proto || exit 1
fi
case "$prepend_cnn_type" in
none)
utils/nnet/make_nnet_proto.py $proto_opts \
${bn_dim:+ --bottleneck-dim=$bn_dim} \
$num_fea $num_tgt $hid_layers $hid_dim >$mlp_proto || exit 1
;;
cnn1d)
utils/nnet/make_cnn_proto.py $cnn_proto_opts \
--splice $splice --delta-order $delta_order --dir $dir \
$num_fea >$mlp_proto || exit 1
cnn_fea=$(cat $mlp_proto | grep -v '^$' | tail -n1 | awk '{ print $5; }')
utils/nnet/make_nnet_proto.py $proto_opts \
--no-proto-head --no-smaller-input-weights \
${bn_dim:+ --bottleneck-dim=$bn_dim} \
"$cnn_fea" $num_tgt $hid_layers $hid_dim >>$mlp_proto || exit 1
;;
cnn2d)
#TODO, to be filled by Vijay...
;;
*) echo "Unknown 'prepend-cnn' value $prepend_cnn" && exit 1;
esac
# initialize
mlp_init=$dir/nnet.init; log=$dir/log/nnet_initialize.log
echo "Initializing $mlp_proto -> $mlp_init"
......
#!/usr/bin/python
# Copyright 2010-2013 Brno University of Technology (author: Karel Vesely)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# ./gen_mlp_init.py
# script generateing NN initialization
import math, random
import sys
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--dim', dest='dim', help='d1:d2:d3 layer dimensions in the network')
parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
parser.add_option('--normalized', dest='normalized', help='Generate normalized weights according to X.Glorot paper, U[-x,x] x=sqrt(6)/(sqrt(dim_in+dim_out))', action='store_true', default=False)
parser.add_option('--activation', dest='activation', help='activation type tag (def. <sigmoid>)', default='<sigmoid>')
parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
parser.add_option('--linOutput', dest='linOutput', help='generate MLP with linear output', action='store_true', default=False)
parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
(options, args) = parser.parse_args()
if(options.dim == None):
parser.print_help()
sys.exit(1)
#seeding
seedval=int(options.seedval)
if(seedval != 0):
random.seed(seedval)
dimStrL = options.dim.split(':')
dimL = []
for i in range(len(dimStrL)):
dimL.append(int(dimStrL[i]))
#print dimL,'linBN',options.linBNdim
print '<Nnet>'
for layer in range(len(dimL)-1):
print '<affinetransform>', dimL[layer+1], dimL[layer]
#precompute...
nomalized_interval = math.sqrt(6.0) / math.sqrt(dimL[layer+1]+dimL[layer])
#weight matrix
print '['
for row in range(dimL[layer+1]):
for col in range(dimL[layer]):
if(options.normalized):
print random.random()*2.0*nomalized_interval - nomalized_interval,
elif(options.gauss):
if(options.inputscale):
print 3/math.sqrt(dimL[layer])*random.gauss(0.0,1.0),
else:
print 0.1*random.gauss(0.0,1.0),
else:
if(options.inputscale):
print (random.random()-0.5)*2*3/math.sqrt(dimL[layer]),
else:
print random.random()/5.0-0.1,
print #newline for each row
print ']'
#bias vector
print '[',
for idx in range(dimL[layer+1]):
if(int(options.linBNdim) == dimL[layer+1]):
print '0.0',
elif(layer == len(dimL)-2):#last layer (softmax)
print '0.0',
elif(options.negbias):
print random.random()/5.0-4.1,
else:
print '0.0',
print ']'
if(int(options.linBNdim) != dimL[layer+1]):
if(layer == len(dimL)-2):
if(not(options.linOutput)) :
print '<softmax>', dimL[layer+1], dimL[layer+1]
else:
#print '<sigmoid>', dimL[layer+1], dimL[layer+1]
print options.activation, dimL[layer+1], dimL[layer+1]
print '</Nnet>'
#!/usr/bin/python
# Copyright 2012 Brno University of Technology (author: Karel Vesely)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
#
# Initialization of single RBM (``layer'')
#
import math, random
import sys
from optparse import OptionParser
parser = OptionParser()
parser.add_option('--dim', dest='dim', help='d1:d2 layer dimensions in the network')
parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights', action='store_true', default=False)
parser.add_option('--gauss-scale', dest='gauss_scale', help='standard deviation of the gaussain noise', default='0.1')
parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (default all 0.0)', action='store_true', default=False)
parser.add_option('--hidtype', dest='hidtype', help='gauss/bern', default='bern')
parser.add_option('--vistype', dest='vistype', help='gauss/bern', default='bern')
parser.add_option('--cmvn-nnet', dest='cmvn_nnet', help='cmvn_nnet to parse mean activation used in visible bias initialization', default='')
(options, args) = parser.parse_args()
if(options.dim == None):
parser.print_help()
sys.exit(1)
dimStrL = options.dim.split(':')
assert(len(dimStrL) == 2) #only single layer to initialize
dimL = []
for i in range(len(dimStrL)):
dimL.append(int(dimStrL[i]))
gauss_scale=float(options.gauss_scale)
#generate RBM
print '<rbm>', dimL[1], dimL[0]
print options.vistype, options.hidtype
#init weight matrix
print '['
for row in range(dimL[1]):
for col in range(dimL[0]):
if(options.gauss):
print gauss_scale * random.gauss(0.0,1.0),
else:
print (random.random()-0.5)/5.0,
print
print ']'
#init visbias
if len(options.cmvn_nnet)>0:
### use the formula log(p/(1-p) for visible biases, where p is mean activity of the neuron
f = open(options.cmvn_nnet)
#make sure file starts by <addshift>
line = f.readline()
arr = line.split(' ')
if arr[0] == '<Nnet>': #optionally skip <Nnet>
line = f.readline()
arr = line.split(' ')
if arr[0].lower() != '<addshift>':