Commit 5269f0f1 authored by Karel Vesely's avatar Karel Vesely
Browse files

trunk,nnet1 : adding --time-shift to 'nnet-forward',



git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4875 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
parent 1d9b858a
......@@ -53,7 +53,8 @@ if [ $stage -le 1 ]; then
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmm/graph $dev $dir/decode || exit 1;
steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \
$gmm/graph_ug $dev $dir/decode_ug || exit 1;
--nnet-forward-opts "--no-softmax=true --prior-scale=1.0 --time-shift=5" \
$gmm/graph $dev $dir/decode_time-shift5 || exit 1;
fi
# TODO : sequence training,
......
......@@ -19,8 +19,8 @@ randomizer_seed=777
feature_transform=
# learn rate scheduling
max_iters=20
min_iters=
keep_lr_iters=0
min_iters= # keep training, disable weight rejection, start learn-rate halving as usual,
keep_lr_iters=0 # fix learning rate for N initial epochs,
#start_halving_inc=0.5
#end_halving_inc=0.1
start_halving_impr=0.01
......@@ -124,9 +124,10 @@ for iter in $(seq -w $max_iters); do
# accept or reject new parameters (based on objective function)
loss_prev=$loss
if [ 1 == $(bc <<< "$loss_new < $loss") -o $iter -le $keep_lr_iters ]; then
if [ 1 == $(bc <<< "$loss_new < $loss") -o $iter -le $keep_lr_iters -o $iter -le $min_iters ]; then
loss=$loss_new
mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.4f" $tr_loss)_cv$(printf "%.4f" $loss_new)
[ $iter -le $min_iters ] && mlp_best=${mlp_best}_min-iters-$min_iters
[ $iter -le $keep_lr_iters ] && mlp_best=${mlp_best}_keep-lr-iters-$keep_lr_iters
mv $mlp_next $mlp_best
echo "nnet accepted ($(basename $mlp_best))"
......
......@@ -54,6 +54,9 @@ int main(int argc, char *argv[]) {
std::string use_gpu="no";
po.Register("use-gpu", &use_gpu, "yes|no|optional, only has effect if compiled with CUDA");
int32 time_shift = 0;
po.Register("time-shift", &time_shift, "LSTM : repeat last input frame N-times, discrad N initial output frames.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
......@@ -123,7 +126,7 @@ int main(int argc, char *argv[]) {
// iterate over all feature files
for (; !feature_reader.Done(); feature_reader.Next()) {
// read
const Matrix<BaseFloat> &mat = feature_reader.Value();
Matrix<BaseFloat> mat = feature_reader.Value();
KALDI_VLOG(2) << "Processing utterance " << num_done+1
<< ", " << feature_reader.Key()
<< ", " << mat.NumRows() << "frm";
......@@ -133,6 +136,15 @@ int main(int argc, char *argv[]) {
if (!KALDI_ISFINITE(sum)) {
KALDI_ERR << "NaN or inf found in features of " << feature_reader.Key();
}
// time-shift, copy the last frame of LSTM input N-times,
if (time_shift > 0) {
int32 last_row = mat.NumRows() - 1; // last row,
mat.Resize(mat.NumRows() + time_shift, mat.NumCols(), kCopyData);
for (int32 r = last_row+1; r<mat.NumRows(); r++) {
mat.CopyRowFromVec(mat.Row(last_row), r); // copy last row,
}
}
// push it to gpu
feats = mat;
......@@ -150,11 +162,17 @@ int main(int argc, char *argv[]) {
pdf_prior.SubtractOnLogpost(&nnet_out);
}
//download from GPU
// download from GPU
nnet_out_host.Resize(nnet_out.NumRows(), nnet_out.NumCols());
nnet_out.CopyToMat(&nnet_out_host);
//check for NaN/inf
// time-shift, remove N first frames of LSTM output,
if (time_shift > 0) {
Matrix<BaseFloat> tmp(nnet_out_host);
nnet_out_host = tmp.RowRange(time_shift, tmp.NumRows() - time_shift);
}
// check for NaN/inf
for (int32 r = 0; r < nnet_out_host.NumRows(); r++) {
for (int32 c = 0; c < nnet_out_host.NumCols(); c++) {
BaseFloat val = nnet_out_host(r,c);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment