Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
LINAGORA
L
LGS
Labs
linstt-offline-decoding
Commits
2ab14876
Commit
2ab14876
authored
Jan 19, 2018
by
Rudy BARAGLIA
Browse files
Added audio file trimming into worker preprocessing
parent
02f81eab
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
77 additions
and
11 deletions
+77
-11
modules/server/master_server.py
modules/server/master_server.py
+4
-2
modules/server/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav
...erver/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav
+0
-0
modules/server/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav
...erver/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav
+0
-0
modules/worker_offline/requirements.txt
modules/worker_offline/requirements.txt
+2
-1
modules/worker_offline/scripts/decode.sh
modules/worker_offline/scripts/decode.sh
+4
-4
modules/worker_offline/signal_trimming.py
modules/worker_offline/signal_trimming.py
+61
-0
modules/worker_offline/worker.cfg
modules/worker_offline/worker.cfg
+1
-1
modules/worker_offline/worker_offline.py
modules/worker_offline/worker_offline.py
+5
-3
No files found.
modules/server/master_server.py
View file @
2ab14876
...
...
@@ -45,6 +45,7 @@ class Application(tornado.web.Application):
(
r
"/worker/ws/speech"
,
WorkerWebSocketHandler
)
]
tornado
.
web
.
Application
.
__init__
(
self
,
handlers
,
**
settings
)
self
.
connected_worker
=
0
self
.
available_workers
=
set
()
self
.
waiting_client
=
set
()
self
.
num_requests_processed
=
0
...
...
@@ -61,7 +62,7 @@ class Application(tornado.web.Application):
def
display_server_status
(
self
):
logging
.
info
(
'#'
*
50
)
logging
.
info
(
"Available
workers
: %s"
%
str
(
len
(
self
.
available_workers
)))
logging
.
info
(
"
Connected workers: %s (
Available: %s
)
"
%
(
str
(
self
.
connected_worker
),
str
(
len
(
self
.
available_workers
)))
)
logging
.
info
(
"Waiting clients: %s"
%
str
(
len
(
self
.
waiting_client
)))
logging
.
info
(
"Requests processed: %s"
%
str
(
self
.
num_requests_processed
))
...
...
@@ -158,6 +159,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
def
open
(
self
):
self
.
client_handler
=
None
self
.
application
.
available_workers
.
add
(
self
)
self
.
application
.
connected_worker
+=
1
self
.
application
.
check_waiting_clients
()
logging
.
debug
(
"Worker connected"
)
self
.
application
.
display_server_status
()
...
...
@@ -169,7 +171,6 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
logging
.
debug
(
"Message received from worker:"
+
message
)
else
:
if
'transcription'
in
json_msg
.
keys
():
#Receive the file path to process
print
(
json_msg
[
'transcription'
])
logging
.
debug
(
"Response send by worker : %s"
%
json
.
dumps
({
'transcript'
:
json_msg
[
'transcription'
].
encode
(
'utf-8'
)}))
self
.
client_handler
.
receive_response
(
json
.
dumps
({
'transcript'
:
json_msg
[
'transcription'
]}))
self
.
client_handler
=
None
...
...
@@ -187,6 +188,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
self
.
client_handler
.
send_error
(
"Worker closed"
)
logging
.
debug
(
"WORKER WebSocket closed"
)
self
.
application
.
available_workers
.
discard
(
self
)
self
.
application
.
connected_worker
-=
1
self
.
application
.
display_server_status
()
def
main
():
...
...
modules/server/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav
0 → 100644
View file @
2ab14876
File added
modules/server/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav
0 → 100644
View file @
2ab14876
File added
modules/worker_offline/requirements.txt
View file @
2ab14876
ws4py
configparser
tenacity
\ No newline at end of file
tenacity
pydub
\ No newline at end of file
modules/worker_offline/scripts/decode.sh
View file @
2ab14876
...
...
@@ -31,7 +31,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
# java -Xmx2024m -jar $lvcsrRootDir/tools/lium_spkdiarization-8.4.1.jar \
# --fInputMask=$file --sOutputMask=$datadir/$fileRootName.seg --doCEClustering $fileRootName
duration
=
`
soxi
-D
$file
`
echo
"
$fileRootName
1 0
$duration
M S U S0"
|
sort
-nk3
>
$datadir
/
$fileRootName
.seg
#
echo "$fileRootName 1 0 $duration M S U S0" | sort -nk3 > $datadir/$fileRootName.seg
fi
if
[
$stage
-le
2
]
;
then
# Generate kaldi input for offline decoding
...
...
@@ -50,7 +50,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
echo
"
$tag
sox
$file
-t wav -r 16000 -c 1 - |"
done
)
>
$datadir
/wav.scp
cat
$datadir
/wav.scp |
awk
'{ print $1, $1, "A"; }'
>
$datadir
/reco2file_and_channel
echo
validate_data_dir.sh
#
echo validate_data_dir.sh
$lvcsrRootDir
/scripts/utils/validate_data_dir.sh
--no-text
--no-feats
$datadir
$lvcsrRootDir
/scripts/utils/fix_data_dir.sh
$datadir
fi
...
...
@@ -67,7 +67,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
if
[
$stage
-le
4
]
;
then
if
[
!
-f
$transdir
/trans.1
]
;
then
echo
"run fmllr decoding"
#
echo "run fmllr decoding"
$lvcsrRootDir
/scripts/steps/decode.sh
--nj
$decode_nj
--cmd
"
$decode_cmd
"
--num-threads
$num_threads
--skip-scoring
"true"
\
$gmmdir
/Graph
$datadir
$transdir
||
exit
1
fi
...
...
@@ -80,5 +80,5 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
### Rescoring with LM
### Get CTM and STM files
echo
"End...."
#
echo "End...."
modules/worker_offline/signal_trimming.py
0 → 100644
View file @
2ab14876
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 17:32:23 2018
@author: rbaraglia
"""
from
pydub
import
AudioSegment
def
detect_leading_silence
(
sound
,
silence_threshold
=-
50.0
,
chunk_size
=
100
):
'''
sound is a pydub.AudioSegment
silence_threshold in dB
chunk_size in ms
iterate over chunks until you find the first one with sound
'''
trim_ms
=
0
# ms
assert
chunk_size
>
0
# to avoid infinite loop
while
sound
[
trim_ms
:
trim_ms
+
chunk_size
].
dBFS
<
silence_threshold
and
trim_ms
<
len
(
sound
):
trim_ms
+=
chunk_size
return
trim_ms
def
average_power_level
(
sound
,
chunk_size
=
100
):
trim_ms
=
0
# ms
nb_chunk
=
0
avg_power
=
0.0
assert
chunk_size
>
0
while
trim_ms
<
len
(
sound
):
trim_ms
+=
chunk_size
if
(
sound
[
trim_ms
:
trim_ms
+
chunk_size
].
dBFS
!=
-
float
(
'Inf'
)):
avg_power
+=
sound
[
trim_ms
:
trim_ms
+
chunk_size
].
dBFS
nb_chunk
+=
1
avg_power
=
avg_power
/
nb_chunk
return
avg_power
'''
trim_silence_segments remove silence (or background noise) from an audio wav file.
It working by trimming signal at the beginning and the end that is below the overall power level
input_file is a .wav file path
output_file is a .wav file path
chunk_size in ms
threshold_factor between 0 and 1
side_effect_accomodation is a number of chunk that will be kept at the beginning and end despite being below the threshold
'''
def
trim_silence_segments
(
input_file
,
output_file
,
chunk_size
=
100
,
threshold_factor
=
0.85
,
side_effect_accomodation
=
0
):
#sound = AudioSegment.from_file("/home/rbaraglia/data/SG/audio-18_01_18/rec---2018-01-18_081957.wav", format="wav")
sound
=
AudioSegment
.
from_file
(
input_file
,
format
=
"wav"
)
avg_power
=
average_power_level
(
sound
)
start_trim
=
detect_leading_silence
(
sound
,
silence_threshold
=
threshold_factor
*
avg_power
)
end_trim
=
detect_leading_silence
(
sound
.
reverse
(),
silence_threshold
=
threshold_factor
*
avg_power
)
duration
=
len
(
sound
)
trimmed_sound
=
sound
[
start_trim
if
start_trim
-
chunk_size
*
side_effect_accomodation
<
0
else
start_trim
-
chunk_size
*
side_effect_accomodation
:
duration
-
end_trim
if
end_trim
+
chunk_size
*
side_effect_accomodation
>
duration
else
duration
-
end_trim
+
chunk_size
*
side_effect_accomodation
]
trimmed_sound
.
export
(
output_file
,
format
=
"wav"
)
modules/worker_offline/worker.cfg
View file @
2ab14876
...
...
@@ -6,4 +6,4 @@ server_target : /worker/ws/speech
[worker_params]
temp_file_location : wavs/
decoder_command : "./scripts/decode.sh systems/models"
preprocessing : false
\ No newline at end of file
preprocessing : true
\ No newline at end of file
modules/worker_offline/worker_offline.py
View file @
2ab14876
...
...
@@ -14,6 +14,7 @@ import subprocess
import
configparser
import
re
import
tenacity
from
signal_trimming
import
*
from
ws4py.client.threadedclient
import
WebSocketClient
...
...
@@ -50,13 +51,14 @@ class WorkerWebSocket(WebSocketClient):
self
.
client_uuid
=
json_msg
[
'uuid'
]
self
.
fileName
=
self
.
client_uuid
.
replace
(
'-'
,
''
)
self
.
file
=
json_msg
[
'file'
].
decode
(
'base64'
)
with
open
(
'./wavs/'
+
self
.
fileName
+
'.wav'
,
'wb'
)
as
f
:
self
.
filepath
=
TEMP_FILE_PATH
+
self
.
fileName
+
'.wav'
with
open
(
self
.
filepath
,
'wb'
)
as
f
:
f
.
write
(
self
.
file
)
logging
.
debug
(
"FileName received: %s"
%
self
.
fileName
)
# TODO: preprocessing ? (sox python)
if
PREPROCESSING
:
pass
logging
.
debug
(
"Trimming signal"
)
trim_silence_segments
(
self
.
filepath
,
self
.
filepath
,
chunk_size
=
100
,
threshold_factor
=
0.85
,
side_effect_accomodation
=
0
)
# Offline decoder call
logging
.
debug
(
DECODER_COMMAND
+
' '
+
TEMP_FILE_PATH
+
self
.
fileName
+
'.wav'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment