run_cnn.sh 5.2 KB
Newer Older
naxingyu's avatar
naxingyu committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
#!/bin/bash

. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
           ## This relates to the queue.

. ./path.sh ## Source the tools/utils (import the queue.pl)

dev=data_fbank/dev
train=data_fbank/train

dev_original=data/dev
train_original=data/train

gmm=exp/tri5a

stage=0
. utils/parse_options.sh || exit 1;


# Make the FBANK features
if [ $stage -le 0 ]; then
  # Dev set
  utils/copy_data_dir.sh $dev_original $dev || exit 1; rm $dev/{cmvn,feats}.scp
  steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
     $dev $dev/log $dev/data || exit 1;
  steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1;
  # Training set
  utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
  steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd" \
     $train $train/log $train/data || exit 1;
  steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;

  # Split the training set
  utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10
fi

# Run the CNN pre-training.
if [ $stage -le 1 ]; then
  dir=exp/cnn5c
  ali=${gmm}_ali
  # Train
  $cuda_cmd $dir/log/train_nnet.log \
    steps/nnet/train.sh \
      --cmvn-opts "--norm-means=true --norm-vars=true" \
      --delta-opts "--delta-order=2" --splice 5 \
      --network-type cnn1d --cnn-proto-opts "--patch-dim1 7 --pitch-dim 3" \
      --hid-layers 2 --learn-rate 0.008 \
      ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;

  # Decode with the trigram language model.
  steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
    --config conf/decode_dnn.config --acwt 0.1 \
    $gmm/graph $dev $dir/decode || exit 1;
fi

# Pre-train stack of RBMs on top of the convolutional layers (2 layers, 2000 units)
if [ $stage -le 2 ]; then
  dir=exp/cnn5c_pretrain-dbn
  transf_cnn=exp/cnn5c/final.feature_transform_cnn # transform with convolutional layers
  # Train
  $cuda_cmd $dir/log/pretrain_dbn.log \
    steps/nnet/pretrain_dbn.sh --nn-depth 2 --hid-dim 2000 --rbm-iter 1 \
    --feature-transform $transf_cnn --input-vis-type bern \
    --param-stddev-first 0.05 --param-stddev 0.05 \
    $train $dir || exit 1
fi

# Re-align using CNN
if [ $stage -le 3 ]; then
  dir=exp/cnn5c
  steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
    $train data/lang $dir ${dir}_ali || exit 1
fi

# Train the DNN optimizing cross-entropy.
if [ $stage -le 4 ]; then
  dir=exp/cnn5c_pretrain-dbn_dnn; [ ! -d $dir ] && mkdir -p $dir/log;
  ali=exp/cnn5c_ali
  feature_transform=exp/cnn5c/final.feature_transform
  feature_transform_dbn=exp/cnn5c_pretrain-dbn/final.feature_transform
  dbn=exp/cnn5c_pretrain-dbn/2.dbn
  cnn_dbn=$dir/cnn_dbn.nnet
  { # Concatenate CNN layers and DBN,
    num_components=$(nnet-info $feature_transform | grep -m1 num-components | awk '{print $2;}')
    nnet-concat "nnet-copy --remove-first-layers=$num_components $feature_transform_dbn - |" $dbn $cnn_dbn \
      2>$dir/log/concat_cnn_dbn.log || exit 1 
  }
  # Train
  $cuda_cmd $dir/log/train_nnet.log \
    steps/nnet/train.sh --feature-transform $feature_transform --dbn $cnn_dbn --hid-layers 0 \
    ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1;

  # Decode with the trigram language model.
  steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
    --config conf/decode_dnn.config --acwt 0.1 \
    $gmm/graph $dev $dir/decode || exit 1;
fi

# Sequence training using sMBR criterion, we do Stochastic-GD 
# with per-utterance updates. For RM good acwt is 0.2 (For WSJ maybe 0.1)
dir=exp/cnn5c_pretrain-dbn_dnn_smbr
srcdir=exp/cnn5c_pretrain-dbn_dnn
acwt=0.1

# First we generate lattices and alignments:
if [ $stage -le 5 ]; then
  steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
    $train data/lang $srcdir ${srcdir}_ali || exit 1;
  steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
    $train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi

# Re-train the DNN by 2 iterations of sMBR 
if [ $stage -le 6 ]; then
  steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 2 --acwt $acwt --do-smbr true \
    $train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
  # Decode
  for ITER in 1 2; do
    steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
      --config conf/decode_dnn.config --acwt $acwt --nnet $dir/${ITER}.nnet \
      $gmm/graph $dev $dir/decode_it${ITER} || exit 1;
  done 
fi

# Re-generate lattices, run 4 more sMBR iterations
dir=exp/cnn5c_pretrain-dbn_dnn_smbr_i1lats
srcdir=exp/cnn5c_pretrain-dbn_dnn_smbr

if [ $stage -le 7 ]; then
  steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
    $train data/lang $srcdir ${srcdir}_ali || exit 1;
  steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
    $train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
    
if [ $stage -le 8 ]; then
  steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
    $train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
  # Decode
  for ITER in 1 2 3 4; do
    steps/nnet/decode.sh --nj 10 --cmd "$decode_cmd" \
      --config conf/decode_dnn.config --acwt $acwt --nnet $dir/${ITER}.nnet \
      $gmm/graph $dev $dir/decode_it${ITER} || exit 1;
  done 
fi

echo Success
exit 0