Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
LINAGORA
L
LGS
Labs
kaldi-modelgen
Commits
2768f86a
Commit
2768f86a
authored
Mar 20, 2017
by
Abdelwahab HEBA
Browse files
script using in librispeech and added for research expirementation... not used yet
parent
0e544ebd
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
294 additions
and
0 deletions
+294
-0
local/nnet2/run_5a_clean_100.sh
local/nnet2/run_5a_clean_100.sh
+73
-0
local/nnet2/run_5c.sh
local/nnet2/run_5c.sh
+63
-0
local/nnet2/run_6a_clean_460.sh
local/nnet2/run_6a_clean_460.sh
+79
-0
local/nnet2/run_7a_960.sh
local/nnet2/run_7a_960.sh
+79
-0
No files found.
local/nnet2/run_5a_clean_100.sh
0 → 100755
View file @
2768f86a
#!/bin/bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
train_stage
=
-10
use_gpu
=
false
.
cmd.sh
.
./path.sh
.
utils/parse_options.sh
if
$use_gpu
;
then
if
!
cuda-compiled
;
then
cat
<<
EOF
&& exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
parallel_opts
=
"--gpu 1"
num_threads
=
1
minibatch_size
=
512
dir
=
exp/nnet5a_clean_100_gpu
else
# with just 4 jobs this might be a little slow.
num_threads
=
4
parallel_opts
=
"--num-threads
$num_threads
"
minibatch_size
=
128
dir
=
exp/nnet5a_clean_100
fi
.
./cmd.sh
.
utils/parse_options.sh
if
[
!
-f
$dir
/final.mdl
]
;
then
if
[[
$(
hostname
-f
)
==
*
.clsp.jhu.edu
]]
;
then
# spread the egs over various machines. will help reduce overload of any
# one machine.
utils/create_split_dir.pl /export/b0
{
1,2,3,4
}
/
$USER
/kaldi-data/egs/librispeech/s5/
$dir
/egs/storage
$dir
/egs/storage
fi
steps/nnet2/train_pnorm_fast.sh
--stage
$train_stage
\
--samples-per-iter
400000
\
--parallel-opts
"
$parallel_opts
"
\
--num-threads
"
$num_threads
"
\
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
4
--mix-up
8000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
4
\
--pnorm-input-dim
2000
--pnorm-output-dim
400
\
--cmd
"
$decode_cmd
"
\
data/train_clean_100 data/lang exp/tri4b_ali_clean_100
$dir
||
exit
1
fi
for
test
in
meeting_test
;
do
steps/nnet2/decode.sh
--nj
2
--cmd
"
$decode_cmd
"
\
--transform-dir
exp/tri4b/decode_tgsmall_
$test
\
$exp_data
/tri3b/graph_tglarge
$idata_kaldi
/
$test
$exp_data
/nn2/decode_tglarge_
$test
#steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
# data/$test $dir/decode_{tgsmall,tgmed}_$test || exit 1;
#steps/lmrescore_const_arpa.sh \
# --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
# data/$test $dir/decode_{tgsmall,tglarge}_$test || exit 1;
#steps/lmrescore_const_arpa.sh \
# --cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
# data/$test $dir/decode_{tgsmall,fglarge}_$test || exit 1;
done
exit
0
;
local/nnet2/run_5c.sh
0 → 100755
View file @
2768f86a
#!/bin/bash
# This is neural net training on top of adapted 40-dimensional features.
#
train_stage
=
-10
use_gpu
=
true
train_set
=
"train-clean-100"
test_sets
=
"dev-clean dev-other"
.
cmd.sh
.
./path.sh
.
utils/parse_options.sh
if
$use_gpu
;
then
if
!
cuda-compiled
;
then
cat
<<
EOF
&& exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
parallel_opts
=
"--gpu 1"
num_threads
=
1
minibatch_size
=
512
dir
=
exp/nnet5c_gpu_
${
train_set
}
else
num_threads
=
16
parallel_opts
=
"-pe smp
$num_threads
"
dir
=
exp/nnet5c_
${
train_set
}
minibatch_size
=
128
fi
if
[
!
-f
$dir
/final.mdl
]
;
then
if
[
"
$USER
"
==
dpovey
]
;
then
# spread the egs over various machines. will help reduce overload of any
# one machine.
utils/create_split_dir.pl /export/b0
{
1,2,3,4
}
/dpovey/kaldi-pure/egs/wsj/s5/
$dir
/egs
$dir
/egs/storage
fi
steps/nnet2/train_tanh_fast.sh
--stage
$train_stage
\
--num-threads
"
$num_threads
"
\
--parallel-opts
"
$parallel_opts
"
\
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
8
\
--samples-per-iter
400000
\
--mix-up
8000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
4
--hidden-layer-dim
1024
\
--cmd
"
$decode_cmd
"
\
data/
$train_set
data/lang exp/tri4b_ali_
${
train_set
}
$dir
||
exit
1
fi
for
test
in
$test_sets
;
do
steps/nnet2/decode.sh
--nj
20
--cmd
"
$decode_cmd
"
\
--transform-dir
exp/tri4b/decode_tgpr_
$test
\
exp/tri4b/graph_tgpr data/
$test
$dir
/decode_tgpr_
$test
||
exit
1
;
done
wait
exit
0
local/nnet2/run_6a_clean_460.sh
0 → 100755
View file @
2768f86a
#!/bin/bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
# This version uses 460 hours of "clean" (typically relatively un-accented)
# training data.
# We're using 6 jobs rather than 4, for speed.
# Note: we highly discourage running this with --use-gpu false, it will
# take way too long.
train_stage
=
-10
use_gpu
=
true
.
cmd.sh
.
./path.sh
.
utils/parse_options.sh
if
$use_gpu
;
then
if
!
cuda-compiled
;
then
cat
<<
EOF
&& exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
parallel_opts
=
"--gpu 1"
num_threads
=
1
minibatch_size
=
512
dir
=
exp/nnet6a_clean_460_gpu
else
# with just 4 jobs this might be a little slow.
num_threads
=
16
parallel_opts
=
"-pe smp
$num_threads
"
minibatch_size
=
128
dir
=
exp/nnet6a_clean_460
fi
.
./cmd.sh
.
utils/parse_options.sh
if
[
!
-f
$dir
/final.mdl
]
;
then
if
[[
$(
hostname
-f
)
==
*
.clsp.jhu.edu
]]
;
then
# spread the egs over various machines. will help reduce overload of any
# one machine.
utils/create_split_dir.pl /export/b0
{
1,2,3,4
}
/
$USER
/kaldi-data/egs/librispeech/s5/
$dir
/egs/storage
$dir
/egs/storage
fi
steps/nnet2/train_pnorm_fast.sh
--stage
$train_stage
\
--samples-per-iter
400000
\
--num-epochs
7
--num-epochs-extra
3
\
--parallel-opts
"
$parallel_opts
"
\
--num-threads
"
$num_threads
"
\
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
6
--mix-up
10000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
4
\
--pnorm-input-dim
4000
--pnorm-output-dim
400
\
--cmd
"
$decode_cmd
"
\
data/train_clean_460 data/lang exp/tri5b
$dir
||
exit
1
fi
for
test
in
test_clean test_other dev_clean dev_other
;
do
steps/nnet2/decode.sh
--nj
20
--cmd
"
$decode_cmd
"
\
--transform-dir
exp/tri5b/decode_tgsmall_
$test
\
exp/tri5b/graph_tgsmall data/
$test
$dir
/decode_tgsmall_
$test
||
exit
1
;
steps/lmrescore.sh
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,tgmed
}
\
data/
$test
$dir
/decode_
{
tgsmall,tgmed
}
_
$test
||
exit
1
;
steps/lmrescore_const_arpa.sh
\
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,tglarge
}
\
data/
$test
$dir
/decode_
{
tgsmall,tglarge
}
_
$test
||
exit
1
;
steps/lmrescore_const_arpa.sh
\
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,fglarge
}
\
data/
$test
$dir
/decode_
{
tgsmall,fglarge
}
_
$test
||
exit
1
;
done
exit
0
;
local/nnet2/run_7a_960.sh
0 → 100755
View file @
2768f86a
#!/bin/bash
# This is p-norm neural net training, with the "fast" script, on top of adapted
# 40-dimensional features.
# This version uses 960 hours of mixed (clean + "other") training data.
# We're using 6 jobs rather than 4, for speed, and 5 hidden layers
# rather than 4.
# Note: we highly discourage running this with --use-gpu false, it will
# take way too long.
train_stage
=
-10
use_gpu
=
true
.
cmd.sh
.
./path.sh
.
utils/parse_options.sh
if
$use_gpu
;
then
if
!
cuda-compiled
;
then
cat
<<
EOF
&& exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
parallel_opts
=
"--gpu 1"
num_threads
=
1
minibatch_size
=
512
dir
=
exp/nnet7a_960_gpu
else
# with just 4 jobs this might be a little slow.
num_threads
=
16
parallel_opts
=
"-pe smp
$num_threads
"
minibatch_size
=
128
dir
=
exp/nnet7a_960
fi
.
./cmd.sh
.
utils/parse_options.sh
if
[
!
-f
$dir
/final.mdl
]
;
then
if
[[
$(
hostname
-f
)
==
*
.clsp.jhu.edu
]]
;
then
# spread the egs over various machines. will help reduce overload of any
# one machine.
utils/create_split_dir.pl /export/b0
{
1,2,3,4
}
/
$USER
/kaldi-data/egs/librispeech/s5/
$dir
/egs/storage
$dir
/egs/storage
fi
steps/nnet2/train_pnorm_fast.sh
--stage
$train_stage
\
--samples-per-iter
400000
\
--num-epochs
6
--num-epochs-extra
2
\
--parallel-opts
"
$parallel_opts
"
\
--num-threads
"
$num_threads
"
\
--minibatch-size
"
$minibatch_size
"
\
--num-jobs-nnet
6
--mix-up
14000
\
--initial-learning-rate
0.01
--final-learning-rate
0.001
\
--num-hidden-layers
5
\
--pnorm-input-dim
5000
--pnorm-output-dim
500
\
--cmd
"
$decode_cmd
"
\
data/train_960 data/lang exp/tri6b
$dir
||
exit
1
fi
for
test
in
test_clean test_other dev_clean dev_other
;
do
steps/nnet2/decode.sh
--nj
20
--cmd
"
$decode_cmd
"
\
--transform-dir
exp/tri6b/decode_tgsmall_
$test
\
exp/tri6b/graph_tgsmall data/
$test
$dir
/decode_tgsmall_
$test
||
exit
1
;
steps/lmrescore.sh
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,tgmed
}
\
data/
$test
$dir
/decode_
{
tgsmall,tgmed
}
_
$test
||
exit
1
;
steps/lmrescore_const_arpa.sh
\
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,tglarge
}
\
data/
$test
$dir
/decode_
{
tgsmall,tglarge
}
_
$test
||
exit
1
;
steps/lmrescore_const_arpa.sh
\
--cmd
"
$decode_cmd
"
data/lang_test_
{
tgsmall,fglarge
}
\
data/
$test
$dir
/decode_
{
tgsmall,fglarge
}
_
$test
||
exit
1
;
done
exit
0
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment