Commit ef56de75 authored by Nagendra Kumar Goel's avatar Nagendra Kumar Goel

updates for avoiding missed speech

parent 9ec6fcca
......@@ -7,6 +7,8 @@ use strict;
use warnings;
use Getopt::Long;
# $SIG{__WARN__} = sub { $DB::single = 1 };
my $Usage = <<'EOU';
This script creates the segments file and text file for a data directory with
new segmentation. It takes a ctm file and an "alignment" file. The ctm file
......@@ -41,9 +43,11 @@ Allowed options:
--wer-cutoff : Ignore segments with WER higher than the specified value.
-1 means no segment will be ignored. (default = -1)
--use-silence-midpoints : Set to 1 if you want to use silence midpoints
instead of min_sil_length for silence overhang.(default 0)
--force-correct-boundary-words : Set to zero if the segments will not be required
to have boundary words to be correct. Default 1
instead of min_sil_length for silence overhang.(default 0)
--force-correct-boundary-words : Set to zero if the segments will not be
required to have boundary words to be correct. Default 1
--aligned-ctm-filename : If set, the intermediate aligned ctm
is saved to this file
EOU
my $max_seg_length = 10.0;
......@@ -54,6 +58,7 @@ my $special_symbol = "<***>";
my $wer_cutoff = -1;
my $use_silence_midpoints = 0;
my $force_correct_boundary_words = 1;
my $aligned_ctm_filename = "";
GetOptions(
'wer-cutoff=f' => \$wer_cutoff,
'max-seg-length=f' => \$max_seg_length,
......@@ -61,6 +66,7 @@ GetOptions(
'min-sil-length=f' => \$min_sil_length,
'use-silence-midpoints=f' => \$use_silence_midpoints,
'force-correct-boundary-words=f' => \$force_correct_boundary_words,
'aligned-ctm-filename=s' => \$aligned_ctm_filename,
'separator=s' => \$separator,
'special-symbol=s' => \$special_symbol);
......@@ -74,7 +80,10 @@ open(CI, "<$ctm_in") || die "Error: fail to open $ctm_in\n";
open(AI, "<$align_in") || die "Error: fail to open $align_in\n";
open(my $SO, ">$segments_out") || die "Error: fail to open $segments_out\n";
open(my $TO, ">$text_out") || die "Error: fail to open $text_out\n";
my $ACT= undef;
if ($aligned_ctm_filename ne "") {
open($ACT, ">$aligned_ctm_filename");
}
# Prints the current segment to file.
sub PrintSegment {
my ($aligned_ctm, $wav_id, $min_sil_length,
......@@ -113,26 +122,37 @@ sub PrintSegment {
# Works out the surrounding silence.
my $index = $seg_start_index - 1;
while ($index >= 0 && $aligned_ctm->[$index]->[0] eq "<eps>") {
while ($index >= 0 && $aligned_ctm->[$index]->[0] eq
"<eps>" && $aligned_ctm->[$index]->[3] == 0) {
$index -= 1;
}
my $left_of_segment_has_deletion = "false";
$left_of_segment_has_deletion = "true"
if ($index > 0 && $aligned_ctm->[$index-1]->[0] ne "<eps>"
&& $aligned_ctm->[$index-1]->[3] == 0);
my $pad_start_sil = ($aligned_ctm->[$seg_start_index]->[1] -
$aligned_ctm->[$index + 1]->[1]) / 2.0;
if (!$use_silence_midpoints) {
if (($left_of_segment_has_deletion eq "true") || !$use_silence_midpoints) {
if ($pad_start_sil > $min_sil_length / 2.0) {
$pad_start_sil = $min_sil_length / 2.0;
}
}
my $right_of_segment_has_deletion = "false";
$index = $seg_end_index + 1;
while ($index < scalar(@{$aligned_ctm}) &&
$aligned_ctm->[$index]->[0] eq "<eps>") {
$aligned_ctm->[$index]->[0] eq "<eps>" &&
$aligned_ctm->[$index]->[3] == 0) {
$index += 1;
}
$right_of_segment_has_deletion = "true"
if ($index < scalar(@{$aligned_ctm})-1 && $aligned_ctm->[$index+1]->[0] ne
"<eps>" && $aligned_ctm->[$index - 1]->[3] > 0);
my $pad_end_sil = ($aligned_ctm->[$index - 1]->[1] +
$aligned_ctm->[$index - 1]->[2] -
$aligned_ctm->[$seg_end_index]->[1] -
$aligned_ctm->[$seg_end_index]->[2]) / 2.0;
if (!$use_silence_midpoints) {
if (($right_of_segment_has_deletion eq "true") || !$use_silence_midpoints) {
if ($pad_end_sil > $min_sil_length / 2.0) {
$pad_end_sil = $min_sil_length / 2.0;
}
......@@ -163,7 +183,7 @@ sub GetSplitPoint {
# Scan in the reversed order so we can maximize the length.
my $split_point = $seg_start_index;
for (my $x = $seg_end_index; $x >= $seg_start_index; $x -= 1) {
for (my $x = $seg_end_index; $x > $seg_start_index; $x -= 1) {
my $current_seg_length = $aligned_ctm->[$x]->[1] +
$aligned_ctm->[$x]->[2] -
$aligned_ctm->[$seg_start_index]->[1];
......@@ -243,7 +263,7 @@ sub SplitLongSegment {
# Processes each wav file.
sub ProcessWav {
my ($max_seg_length, $min_seg_length, $min_sil_length, $special_symbol,
$current_ctm, $current_align, $SO, $TO) = @_;
$current_ctm, $current_align, $SO, $TO, $ACT) = @_;
my $wav_id = $current_ctm->[0]->[0];
defined($wav_id) || die "Error: empty wav section\n";
......@@ -298,6 +318,14 @@ sub ProcessWav {
}
}
# Save the aligned CTM if needed
if(tell($ACT) != -1){
for (my $i=0; $i<=$#aligned_ctm; $i++) {
print $ACT "$aligned_ctm[$i][0] $aligned_ctm[$i][1] ";
print $ACT "$aligned_ctm[$i][2] $aligned_ctm[$i][3]\n";
}
}
# Second, we create segments from @align_ctm, using simple greedy method.
my $current_seg_index = 0;
my $current_seg_count = 0;
......@@ -306,6 +334,7 @@ sub ProcessWav {
$lcorrect = "false" if ($x > 0 && $aligned_ctm[$x - 1]->[3] > 0);
$rcorrect = "false" if ($x < @aligned_ctm - 1 &&
$aligned_ctm[$x + 1]->[3] > 0);
my $current_seg_length = GetSegmentLengthNoSil(\@aligned_ctm,
$current_seg_index, $x);
......@@ -314,8 +343,8 @@ sub ProcessWav {
# that segment contains actual words, instead of pure silence.
if ($aligned_ctm[$x]->[0] eq "<eps>" &&
$aligned_ctm[$x]->[2] >= $min_sil_length
&& (($force_correct_boundary_words && $lcorrect eq "true" && $rcorrect eq "true")
|| !$force_correct_boundary_words)) {
&& (($force_correct_boundary_words && $lcorrect eq "true" &&
$rcorrect eq "true") || !$force_correct_boundary_words)) {
if ($current_seg_length <= $max_seg_length &&
$current_seg_length >= $min_seg_length) {
my $ans = PrintSegment(\@aligned_ctm, $wav_id, $min_sil_length,
......@@ -403,7 +432,7 @@ while (<CI>) {
InsertSilence(\@current_wav, \@current_wav_silence);
ProcessWav($max_seg_length, $min_seg_length, $min_sil_length,
$special_symbol, \@current_wav_silence,
$aligned{$previous_wav_id}, $SO, $TO);
$aligned{$previous_wav_id}, $SO, $TO, $ACT);
}
@current_wav = ();
push(@current_wav, \@col);
......@@ -419,10 +448,11 @@ if (@current_wav > 0) {
my @current_wav_silence = ();
InsertSilence(\@current_wav, \@current_wav_silence);
ProcessWav($max_seg_length, $min_seg_length, $min_sil_length, $special_symbol,
\@current_wav_silence, $aligned{$previous_wav_id}, $SO, $TO);
\@current_wav_silence, $aligned{$previous_wav_id}, $SO, $TO, $ACT);
}
close(CI);
close(AI);
close($SO);
close($TO);
close($ACT);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment