signal_trimming.py 2.34 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 18 17:32:23 2018

@author: rbaraglia
"""

from pydub import AudioSegment


def detect_leading_silence(sound, silence_threshold=-50.0, chunk_size=100):
    '''
    sound is a pydub.AudioSegment
    silence_threshold in dB
    chunk_size in ms

    iterate over chunks until you find the first one with sound
    '''
    trim_ms = 0 # ms

    assert chunk_size > 0 # to avoid infinite loop
    while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
        trim_ms += chunk_size
    return trim_ms

def average_power_level(sound, chunk_size=100):
    trim_ms = 0 # ms
    nb_chunk = 0
    avg_power = 0.0
    assert chunk_size >0
    while trim_ms < len(sound):
        trim_ms += chunk_size
        if (sound[trim_ms:trim_ms+chunk_size].dBFS != -float('Inf')):
            avg_power += sound[trim_ms:trim_ms+chunk_size].dBFS
            nb_chunk += 1
    avg_power = avg_power/nb_chunk
    return avg_power

    '''
    trim_silence_segments remove silence (or background noise) from an audio wav file.
    It working by trimming signal at the beginning and the end that is below the overall power level
    input_file is a .wav file path
    output_file is a .wav file path
    chunk_size in ms
    threshold_factor between 0 and 1
    side_effect_accomodation is a number of chunk that will be kept at the beginning and end despite being below the threshold
    
    '''
def trim_silence_segments(input_file,output_file, chunk_size=100, threshold_factor=0.85, side_effect_accomodation=0):
    #sound = AudioSegment.from_file("/home/rbaraglia/data/SG/audio-18_01_18/rec---2018-01-18_081957.wav", format="wav")
    sound = AudioSegment.from_file(input_file, format="wav")
    avg_power = average_power_level(sound)
    start_trim = detect_leading_silence(sound,silence_threshold= threshold_factor * avg_power)
    end_trim = detect_leading_silence(sound.reverse(), silence_threshold= threshold_factor * avg_power)
    
    duration = len(sound)
    trimmed_sound = sound[start_trim if start_trim - chunk_size*side_effect_accomodation < 0 else start_trim - chunk_size*side_effect_accomodation : duration-end_trim if end_trim + chunk_size*side_effect_accomodation > duration else duration-end_trim + chunk_size*side_effect_accomodation]
    trimmed_sound.export(output_file, format="wav")