diff --git a/modules/server/master_server.py b/modules/server/master_server.py index e6680d0fe0dcb8b12d0e17cc081857441fc6cf4b..cd54c73e33db24807ad7e651f68d4ace4078f4d6 100755 --- a/modules/server/master_server.py +++ b/modules/server/master_server.py @@ -45,6 +45,7 @@ class Application(tornado.web.Application): (r"/worker/ws/speech", WorkerWebSocketHandler) ] tornado.web.Application.__init__(self, handlers, **settings) + self.connected_worker = 0 self.available_workers = set() self.waiting_client = set() self.num_requests_processed = 0 @@ -61,7 +62,7 @@ class Application(tornado.web.Application): def display_server_status(self): logging.info('#'*50) - logging.info("Available workers: %s" % str(len(self.available_workers))) + logging.info("Connected workers: %s (Available: %s)" % (str(self.connected_worker),str(len(self.available_workers)))) logging.info("Waiting clients: %s" % str(len(self.waiting_client))) logging.info("Requests processed: %s" % str(self.num_requests_processed)) @@ -158,6 +159,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler): def open(self): self.client_handler = None self.application.available_workers.add(self) + self.application.connected_worker += 1 self.application.check_waiting_clients() logging.debug("Worker connected") self.application.display_server_status() @@ -169,7 +171,6 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler): logging.debug("Message received from worker:" + message) else: if 'transcription' in json_msg.keys(): #Receive the file path to process - print(json_msg['transcription']) logging.debug("Response send by worker : %s" % json.dumps({'transcript':json_msg['transcription'].encode('utf-8')})) self.client_handler.receive_response(json.dumps({'transcript':json_msg['transcription']})) self.client_handler = None @@ -187,6 +188,7 @@ class WorkerWebSocketHandler(tornado.websocket.WebSocketHandler): self.client_handler.send_error("Worker closed") logging.debug("WORKER WebSocket closed") self.application.available_workers.discard(self) + self.application.connected_worker -= 1 self.application.display_server_status() def main(): diff --git a/modules/server/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav b/modules/server/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav new file mode 100644 index 0000000000000000000000000000000000000000..1bb9ef286ff02c876d2a4fbbac798b32778af0e8 Binary files /dev/null and b/modules/server/temp_files/415fef03-2443-4531-bcea-0ae5c30a5297.wav differ diff --git a/modules/server/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav b/modules/server/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav new file mode 100644 index 0000000000000000000000000000000000000000..9e5e34a392c91bf1e369616dbb523c272b87494e Binary files /dev/null and b/modules/server/temp_files/53765235-69ed-41ce-a1f2-2f160388b8ad.wav differ diff --git a/modules/worker_offline/requirements.txt b/modules/worker_offline/requirements.txt index b67fdc8f277afc84c2e077a98608583391fc8262..74044b57b240e5bc0536872fac4268ca44375760 100644 --- a/modules/worker_offline/requirements.txt +++ b/modules/worker_offline/requirements.txt @@ -1,3 +1,4 @@ ws4py configparser -tenacity \ No newline at end of file +tenacity +pydub \ No newline at end of file diff --git a/modules/worker_offline/scripts/decode.sh b/modules/worker_offline/scripts/decode.sh index c616c66275217f5f10455ac6e31f79298514f1c0..2dd941107331555eb4a670d87f3799d3e3f35a3b 100755 --- a/modules/worker_offline/scripts/decode.sh +++ b/modules/worker_offline/scripts/decode.sh @@ -31,7 +31,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=") # java -Xmx2024m -jar $lvcsrRootDir/tools/lium_spkdiarization-8.4.1.jar \ # --fInputMask=$file --sOutputMask=$datadir/$fileRootName.seg --doCEClustering $fileRootName duration=`soxi -D $file` - echo "$fileRootName 1 0 $duration M S U S0" | sort -nk3 > $datadir/$fileRootName.seg +# echo "$fileRootName 1 0 $duration M S U S0" | sort -nk3 > $datadir/$fileRootName.seg fi if [ $stage -le 2 ]; then # Generate kaldi input for offline decoding @@ -50,7 +50,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=") echo "$tag sox $file -t wav -r 16000 -c 1 - |" done) > $datadir/wav.scp cat $datadir/wav.scp | awk '{ print $1, $1, "A"; }' > $datadir/reco2file_and_channel - echo validate_data_dir.sh +# echo validate_data_dir.sh $lvcsrRootDir/scripts/utils/validate_data_dir.sh --no-text --no-feats $datadir $lvcsrRootDir/scripts/utils/fix_data_dir.sh $datadir fi @@ -67,7 +67,7 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=") if [ $stage -le 4 ]; then if [ ! -f $transdir/trans.1 ]; then - echo "run fmllr decoding" + # echo "run fmllr decoding" $lvcsrRootDir/scripts/steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads $num_threads --skip-scoring "true" \ $gmmdir/Graph $datadir $transdir || exit 1 fi @@ -80,5 +80,5 @@ sysRootName=$(echo $(basename $sysdir)|cut -f1 -d"=") ### Rescoring with LM ### Get CTM and STM files -echo "End...." +#echo "End...." diff --git a/modules/worker_offline/signal_trimming.py b/modules/worker_offline/signal_trimming.py new file mode 100644 index 0000000000000000000000000000000000000000..7ae5ca0a3e56f53d9c2228ff04b2adb92c7ef0bd --- /dev/null +++ b/modules/worker_offline/signal_trimming.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jan 18 17:32:23 2018 + +@author: rbaraglia +""" + +from pydub import AudioSegment + + +def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=100): + ''' + sound is a pydub.AudioSegment + silence_threshold in dB + chunk_size in ms + + iterate over chunks until you find the first one with sound + ''' + trim_ms = 0 # ms + + assert chunk_size > 0 # to avoid infinite loop + while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound): + trim_ms += chunk_size + return trim_ms + +def average_power_level(sound, chunk_size=100): + trim_ms = 0 # ms + nb_chunk = 0 + avg_power = 0.0 + assert chunk_size >0 + while trim_ms < len(sound): + trim_ms += chunk_size + if (sound[trim_ms:trim_ms+chunk_size].dBFS != -float('Inf')): + avg_power += sound[trim_ms:trim_ms+chunk_size].dBFS + nb_chunk += 1 + avg_power = avg_power/nb_chunk + return avg_power + + ''' + trim_silence_segments remove silence (or background noise) from an audio wav file. + It working by trimming signal at the beginning and the end that is below the overall power level + input_file is a .wav file path + output_file is a .wav file path + chunk_size in ms + threshold_factor between 0 and 1 + side_effect_accomodation is a number of chunk that will be kept at the beginning and end despite being below the threshold + + ''' +def trim_silence_segments(input_file,output_file, chunk_size=100, threshold_factor=0.85, side_effect_accomodation=0): + #sound = AudioSegment.from_file("/home/rbaraglia/data/SG/audio-18_01_18/rec---2018-01-18_081957.wav", format="wav") + sound = AudioSegment.from_file(input_file, format="wav") + avg_power = average_power_level(sound) + start_trim = detect_leading_silence(sound,silence_threshold= threshold_factor * avg_power) + end_trim = detect_leading_silence(sound.reverse(), silence_threshold= threshold_factor * avg_power) + + duration = len(sound) + trimmed_sound = sound[start_trim if start_trim - chunk_size*side_effect_accomodation < 0 else start_trim - chunk_size*side_effect_accomodation : duration-end_trim if end_trim + chunk_size*side_effect_accomodation > duration else duration-end_trim + chunk_size*side_effect_accomodation] + trimmed_sound.export(output_file, format="wav") + + diff --git a/modules/worker_offline/worker.cfg b/modules/worker_offline/worker.cfg index 19f300fb188633a0b5ef1a929271e3b1428c2c5a..abc6b9ba45d7647b9e6ad2be60651a6fcc2b8d6e 100644 --- a/modules/worker_offline/worker.cfg +++ b/modules/worker_offline/worker.cfg @@ -6,4 +6,4 @@ server_target : /worker/ws/speech [worker_params] temp_file_location : wavs/ decoder_command : "./scripts/decode.sh systems/models" -preprocessing : false \ No newline at end of file +preprocessing : true \ No newline at end of file diff --git a/modules/worker_offline/worker_offline.py b/modules/worker_offline/worker_offline.py index 0559295c443eeb6fb87b14cc1d1f8790e2fd311a..643c4d2ed76c2c4d427e6b0d0f12f10aa66314d6 100755 --- a/modules/worker_offline/worker_offline.py +++ b/modules/worker_offline/worker_offline.py @@ -14,6 +14,7 @@ import subprocess import configparser import re import tenacity +from signal_trimming import * from ws4py.client.threadedclient import WebSocketClient @@ -50,13 +51,14 @@ class WorkerWebSocket(WebSocketClient): self.client_uuid = json_msg['uuid'] self.fileName = self.client_uuid.replace('-', '') self.file = json_msg['file'].decode('base64') - - with open('./wavs/'+self.fileName+'.wav', 'wb') as f: + self.filepath = TEMP_FILE_PATH+self.fileName+'.wav' + with open(self.filepath, 'wb') as f: f.write(self.file) logging.debug("FileName received: %s" % self.fileName) # TODO: preprocessing ? (sox python) if PREPROCESSING: - pass + logging.debug("Trimming signal") + trim_silence_segments(self.filepath,self.filepath, chunk_size=100, threshold_factor=0.85, side_effect_accomodation=0) # Offline decoder call logging.debug(DECODER_COMMAND + ' ' + TEMP_FILE_PATH + self.fileName+'.wav')