Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Abdelwahab HEBA
kaldi_2015
Commits
ef56de75
Commit
ef56de75
authored
Jul 24, 2015
by
Nagendra Kumar Goel
Browse files
updates for avoiding missed speech
parent
9ec6fcca
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
44 additions
and
14 deletions
+44
-14
egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
+44
-14
No files found.
egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
View file @
ef56de75
...
...
@@ -7,6 +7,8 @@ use strict;
use
warnings
;
use
Getopt::
Long
;
# $SIG{__WARN__} = sub { $DB::single = 1 };
my
$Usage
=
<<'EOU';
This script creates the segments file and text file for a data directory with
new segmentation. It takes a ctm file and an "alignment" file. The ctm file
...
...
@@ -41,9 +43,11 @@ Allowed options:
--wer-cutoff : Ignore segments with WER higher than the specified value.
-1 means no segment will be ignored. (default = -1)
--use-silence-midpoints : Set to 1 if you want to use silence midpoints
instead of min_sil_length for silence overhang.(default 0)
--force-correct-boundary-words : Set to zero if the segments will not be required
to have boundary words to be correct. Default 1
instead of min_sil_length for silence overhang.(default 0)
--force-correct-boundary-words : Set to zero if the segments will not be
required to have boundary words to be correct. Default 1
--aligned-ctm-filename : If set, the intermediate aligned ctm
is saved to this file
EOU
my
$max_seg_length
=
10.0
;
...
...
@@ -54,6 +58,7 @@ my $special_symbol = "<***>";
my
$wer_cutoff
=
-
1
;
my
$use_silence_midpoints
=
0
;
my
$force_correct_boundary_words
=
1
;
my
$aligned_ctm_filename
=
"";
GetOptions
(
'
wer-cutoff=f
'
=>
\
$wer_cutoff
,
'
max-seg-length=f
'
=>
\
$max_seg_length
,
...
...
@@ -61,6 +66,7 @@ GetOptions(
'
min-sil-length=f
'
=>
\
$min_sil_length
,
'
use-silence-midpoints=f
'
=>
\
$use_silence_midpoints
,
'
force-correct-boundary-words=f
'
=>
\
$force_correct_boundary_words
,
'
aligned-ctm-filename=s
'
=>
\
$aligned_ctm_filename
,
'
separator=s
'
=>
\
$separator
,
'
special-symbol=s
'
=>
\
$special_symbol
);
...
...
@@ -74,7 +80,10 @@ open(CI, "<$ctm_in") || die "Error: fail to open $ctm_in\n";
open
(
AI
,
"
<
$align_in
")
||
die
"
Error: fail to open
$align_in
\n
";
open
(
my
$SO
,
"
>
$segments_out
")
||
die
"
Error: fail to open
$segments_out
\n
";
open
(
my
$TO
,
"
>
$text_out
")
||
die
"
Error: fail to open
$text_out
\n
";
my
$ACT
=
undef
;
if
(
$aligned_ctm_filename
ne
"")
{
open
(
$ACT
,
"
>
$aligned_ctm_filename
");
}
# Prints the current segment to file.
sub
PrintSegment
{
my
(
$aligned_ctm
,
$wav_id
,
$min_sil_length
,
...
...
@@ -113,26 +122,37 @@ sub PrintSegment {
# Works out the surrounding silence.
my
$index
=
$seg_start_index
-
1
;
while
(
$index
>=
0
&&
$aligned_ctm
->
[
$index
]
->
[
0
]
eq
"
<eps>
")
{
while
(
$index
>=
0
&&
$aligned_ctm
->
[
$index
]
->
[
0
]
eq
"
<eps>
"
&&
$aligned_ctm
->
[
$index
]
->
[
3
]
==
0
)
{
$index
-=
1
;
}
my
$left_of_segment_has_deletion
=
"
false
";
$left_of_segment_has_deletion
=
"
true
"
if
(
$index
>
0
&&
$aligned_ctm
->
[
$index
-
1
]
->
[
0
]
ne
"
<eps>
"
&&
$aligned_ctm
->
[
$index
-
1
]
->
[
3
]
==
0
);
my
$pad_start_sil
=
(
$aligned_ctm
->
[
$seg_start_index
]
->
[
1
]
-
$aligned_ctm
->
[
$index
+
1
]
->
[
1
])
/
2.0
;
if
(
!
$use_silence_midpoints
)
{
if
(
(
$left_of_segment_has_deletion
eq
"
true
")
||
!
$use_silence_midpoints
)
{
if
(
$pad_start_sil
>
$min_sil_length
/
2.0
)
{
$pad_start_sil
=
$min_sil_length
/
2.0
;
}
}
my
$right_of_segment_has_deletion
=
"
false
";
$index
=
$seg_end_index
+
1
;
while
(
$index
<
scalar
(
@
{
$aligned_ctm
})
&&
$aligned_ctm
->
[
$index
]
->
[
0
]
eq
"
<eps>
")
{
$aligned_ctm
->
[
$index
]
->
[
0
]
eq
"
<eps>
"
&&
$aligned_ctm
->
[
$index
]
->
[
3
]
==
0
)
{
$index
+=
1
;
}
$right_of_segment_has_deletion
=
"
true
"
if
(
$index
<
scalar
(
@
{
$aligned_ctm
})
-
1
&&
$aligned_ctm
->
[
$index
+
1
]
->
[
0
]
ne
"
<eps>
"
&&
$aligned_ctm
->
[
$index
-
1
]
->
[
3
]
>
0
);
my
$pad_end_sil
=
(
$aligned_ctm
->
[
$index
-
1
]
->
[
1
]
+
$aligned_ctm
->
[
$index
-
1
]
->
[
2
]
-
$aligned_ctm
->
[
$seg_end_index
]
->
[
1
]
-
$aligned_ctm
->
[
$seg_end_index
]
->
[
2
])
/
2.0
;
if
(
!
$use_silence_midpoints
)
{
if
(
(
$right_of_segment_has_deletion
eq
"
true
")
||
!
$use_silence_midpoints
)
{
if
(
$pad_end_sil
>
$min_sil_length
/
2.0
)
{
$pad_end_sil
=
$min_sil_length
/
2.0
;
}
...
...
@@ -163,7 +183,7 @@ sub GetSplitPoint {
# Scan in the reversed order so we can maximize the length.
my
$split_point
=
$seg_start_index
;
for
(
my
$x
=
$seg_end_index
;
$x
>
=
$seg_start_index
;
$x
-=
1
)
{
for
(
my
$x
=
$seg_end_index
;
$x
>
$seg_start_index
;
$x
-=
1
)
{
my
$current_seg_length
=
$aligned_ctm
->
[
$x
]
->
[
1
]
+
$aligned_ctm
->
[
$x
]
->
[
2
]
-
$aligned_ctm
->
[
$seg_start_index
]
->
[
1
];
...
...
@@ -243,7 +263,7 @@ sub SplitLongSegment {
# Processes each wav file.
sub
ProcessWav
{
my
(
$max_seg_length
,
$min_seg_length
,
$min_sil_length
,
$special_symbol
,
$current_ctm
,
$current_align
,
$SO
,
$TO
)
=
@_
;
$current_ctm
,
$current_align
,
$SO
,
$TO
,
$ACT
)
=
@_
;
my
$wav_id
=
$current_ctm
->
[
0
]
->
[
0
];
defined
(
$wav_id
)
||
die
"
Error: empty wav section
\n
";
...
...
@@ -298,6 +318,14 @@ sub ProcessWav {
}
}
# Save the aligned CTM if needed
if
(
tell
(
$ACT
)
!=
-
1
){
for
(
my
$i
=
0
;
$i
<=
$#aligned_ctm
;
$i
++
)
{
print
$ACT
"
$aligned_ctm
[
$i
][0]
$aligned_ctm
[
$i
][1]
";
print
$ACT
"
$aligned_ctm
[
$i
][2]
$aligned_ctm
[
$i
][3]
\n
";
}
}
# Second, we create segments from @align_ctm, using simple greedy method.
my
$current_seg_index
=
0
;
my
$current_seg_count
=
0
;
...
...
@@ -306,6 +334,7 @@ sub ProcessWav {
$lcorrect
=
"
false
"
if
(
$x
>
0
&&
$aligned_ctm
[
$x
-
1
]
->
[
3
]
>
0
);
$rcorrect
=
"
false
"
if
(
$x
<
@aligned_ctm
-
1
&&
$aligned_ctm
[
$x
+
1
]
->
[
3
]
>
0
);
my
$current_seg_length
=
GetSegmentLengthNoSil
(
\
@aligned_ctm
,
$current_seg_index
,
$x
);
...
...
@@ -314,8 +343,8 @@ sub ProcessWav {
# that segment contains actual words, instead of pure silence.
if
(
$aligned_ctm
[
$x
]
->
[
0
]
eq
"
<eps>
"
&&
$aligned_ctm
[
$x
]
->
[
2
]
>=
$min_sil_length
&&
((
$force_correct_boundary_words
&&
$lcorrect
eq
"
true
"
&&
$rcorrect
eq
"
true
")
||
!
$force_correct_boundary_words
))
{
&&
((
$force_correct_boundary_words
&&
$lcorrect
eq
"
true
"
&&
$rcorrect
eq
"
true
")
||
!
$force_correct_boundary_words
))
{
if
(
$current_seg_length
<=
$max_seg_length
&&
$current_seg_length
>=
$min_seg_length
)
{
my
$ans
=
PrintSegment
(
\
@aligned_ctm
,
$wav_id
,
$min_sil_length
,
...
...
@@ -403,7 +432,7 @@ while (<CI>) {
InsertSilence
(
\
@current_wav
,
\
@current_wav_silence
);
ProcessWav
(
$max_seg_length
,
$min_seg_length
,
$min_sil_length
,
$special_symbol
,
\
@current_wav_silence
,
$aligned
{
$previous_wav_id
},
$SO
,
$TO
);
$aligned
{
$previous_wav_id
},
$SO
,
$TO
,
$ACT
);
}
@current_wav
=
();
push
(
@current_wav
,
\
@col
);
...
...
@@ -419,10 +448,11 @@ if (@current_wav > 0) {
my
@current_wav_silence
=
();
InsertSilence
(
\
@current_wav
,
\
@current_wav_silence
);
ProcessWav
(
$max_seg_length
,
$min_seg_length
,
$min_sil_length
,
$special_symbol
,
\
@current_wav_silence
,
$aligned
{
$previous_wav_id
},
$SO
,
$TO
);
\
@current_wav_silence
,
$aligned
{
$previous_wav_id
},
$SO
,
$TO
,
$ACT
);
}
close
(
CI
);
close
(
AI
);
close
(
$SO
);
close
(
$TO
);
close
(
$ACT
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment