Commit 2ab14876 authored by Rudy BARAGLIA's avatar Rudy BARAGLIA

Added audio file trimming into worker preprocessing

parent 02f81eab
......@@ -45,6 +45,7 @@ class Application(tornado.web.Application):
(r"/worker/ws/speech", WorkerWebSocketHandler)
]
tornado.web.Application.__init__(self, handlers, **settings)
self.connected_worker = 0
self.available_workers = set()
self.waiting_client = set()
self.num_requests_processed = 0
......@@ -61,7 +62,7 @@ class Application(tornado.web.Application):
def display_server_status(self):
logging.info('#'*50)
logging.info("Available workers: %s" % str(len(self.available_workers)))
logging.info("Connected workers: %s (Available: %s)" % (str(self.connected_worker),str(len(self.available_workers))))
logging.info("Waiting clients: %s" % str(len(self.waiting_client)))
logging.info("Requests processed: %s" % str(self.num_requests_processed))
......@@ -158,6 +159,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
def open(self):
self.client_handler = None
self.application.available_workers.add(self)
self.application.connected_worker += 1
self.application.check_waiting_clients()
logging.debug("Worker connected")
self.application.display_server_status()
......@@ -169,7 +171,6 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
logging.debug("Message received from worker:" + message)
else:
if 'transcription' in json_msg.keys(): #Receive the file path to process
print(json_msg['transcription'])
logging.debug("Response send by worker : %s" % json.dumps({'transcript':json_msg['transcription'].encode('utf-8')}))
self.client_handler.receive_response(json.dumps({'transcript':json_msg['transcription']}))
self.client_handler = None
......@@ -187,6 +188,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler):
self.client_handler.send_error("Worker closed")
logging.debug("WORKER WebSocket closed")
self.application.available_workers.discard(self)
self.application.connected_worker -= 1
self.application.display_server_status()
def main():
......
ws4py
configparser
tenacity
\ No newline at end of file
tenacity
pydub
\ No newline at end of file
......@@ -31,7 +31,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
# java -Xmx2024m -jar $lvcsrRootDir/tools/lium_spkdiarization-8.4.1.jar \
# --fInputMask=$file --sOutputMask=$datadir/$fileRootName.seg --doCEClustering $fileRootName
duration=`soxi -D $file`
echo "$fileRootName 1 0 $duration M S U S0" | sort -nk3 > $datadir/$fileRootName.seg
# echo "$fileRootName 1 0 $duration M S U S0" | sort -nk3 > $datadir/$fileRootName.seg
fi
if [ $stage -le 2 ]; then
# Generate kaldi input for offline decoding
......@@ -50,7 +50,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
echo "$tag sox $file -t wav -r 16000 -c 1 - |"
done) > $datadir/wav.scp
cat $datadir/wav.scp | awk '{ print $1, $1, "A"; }' > $datadir/reco2file_and_channel
echo validate_data_dir.sh
# echo validate_data_dir.sh
$lvcsrRootDir/scripts/utils/validate_data_dir.sh --no-text --no-feats $datadir
$lvcsrRootDir/scripts/utils/fix_data_dir.sh $datadir
fi
......@@ -67,7 +67,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
if [ $stage -le 4 ]; then
if [ ! -f $transdir/trans.1 ]; then
echo "run fmllr decoding"
# echo "run fmllr decoding"
$lvcsrRootDir/scripts/steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $num_threads --skip-scoring "true" \
$gmmdir/Graph $datadir $transdir || exit 1
fi
......@@ -80,5 +80,5 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=")
### Rescoring with LM
### Get CTM and STM files
echo "End...."
#echo "End...."
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 17:32:23 2018
@author: rbaraglia
"""
from pydub import AudioSegment
def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=100):
'''
sound is a pydub.AudioSegment
silence_threshold in dB
chunk_size in ms
iterate over chunks until you find the first one with sound
'''
trim_ms = 0 # ms
assert chunk_size > 0 # to avoid infinite loop
while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
trim_ms += chunk_size
return trim_ms
def average_power_level(sound, chunk_size=100):
trim_ms = 0 # ms
nb_chunk = 0
avg_power = 0.0
assert chunk_size >0
while trim_ms < len(sound):
trim_ms += chunk_size
if (sound[trim_ms:trim_ms+chunk_size].dBFS != -float('Inf')):
avg_power += sound[trim_ms:trim_ms+chunk_size].dBFS
nb_chunk += 1
avg_power = avg_power/nb_chunk
return avg_power
'''
trim_silence_segments remove silence (or background noise) from an audio wav file.
It working by trimming signal at the beginning and the end that is below the overall power level
input_file is a .wav file path
output_file is a .wav file path
chunk_size in ms
threshold_factor between 0 and 1
side_effect_accomodation is a number of chunk that will be kept at the beginning and end despite being below the threshold
'''
def trim_silence_segments(input_file,output_file, chunk_size=100, threshold_factor=0.85, side_effect_accomodation=0):
#sound = AudioSegment.from_file("/home/rbaraglia/data/SG/audio-18_01_18/rec---2018-01-18_081957.wav", format="wav")
sound = AudioSegment.from_file(input_file, format="wav")
avg_power = average_power_level(sound)
start_trim = detect_leading_silence(sound,silence_threshold= threshold_factor * avg_power)
end_trim = detect_leading_silence(sound.reverse(), silence_threshold= threshold_factor * avg_power)
duration = len(sound)
trimmed_sound = sound[start_trim if start_trim - chunk_size*side_effect_accomodation < 0 else start_trim - chunk_size*side_effect_accomodation : duration-end_trim if end_trim + chunk_size*side_effect_accomodation > duration else duration-end_trim + chunk_size*side_effect_accomodation]
trimmed_sound.export(output_file, format="wav")
......@@ -6,4 +6,4 @@ server_target : /worker/ws/speech
[worker_params]
temp_file_location : wavs/
decoder_command : "./scripts/decode.sh systems/models"
preprocessing : false
\ No newline at end of file
preprocessing : true
\ No newline at end of file
......@@ -14,6 +14,7 @@ import subprocess
import configparser
import re
import tenacity
from signal_trimming import *
from ws4py.client.threadedclient import WebSocketClient
......@@ -50,13 +51,14 @@ class WorkerWebSocket(WebSocketClient):
self.client_uuid = json_msg['uuid']
self.fileName = self.client_uuid.replace('-', '')
self.file = json_msg['file'].decode('base64')
with open('./wavs/'+self.fileName+'.wav', 'wb') as f:
self.filepath = TEMP_FILE_PATH+self.fileName+'.wav'
with open(self.filepath, 'wb') as f:
f.write(self.file)
logging.debug("FileName received: %s" % self.fileName)
# TODO: preprocessing ? (sox python)
if PREPROCESSING:
pass
logging.debug("Trimming signal")
trim_silence_segments(self.filepath,self.filepath, chunk_size=100, threshold_factor=0.85, side_effect_accomodation=0)
# Offline decoder call
logging.debug(DECODER_COMMAND + ' ' + TEMP_FILE_PATH + self.fileName+'.wav')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment