diff --git a/dialectsnipper.py b/dialectsnipper.py new file mode 100755 index 0000000..c4d8356 --- /dev/null +++ b/dialectsnipper.py @@ -0,0 +1,36 @@ +#! /usr/bin/python + +import sys +import os +from pathlib import Path +from pydub import AudioSegment, silence + +directory = str(sys.argv[1]) + +for path in Path(directory).rglob('*.mp3'): + full_path = str(path) + audio = AudioSegment.from_mp3(full_path) + silences = silence.detect_silence(audio, 500, -35, 50) + exported = False + sub_path = full_path[len(directory):].strip('/') + stripped_path = str(Path(directory).parent).rstrip('/') + '/strippedAccents/' + sub_path + try: + os.makedirs(os.path.dirname(stripped_path)) + except: + pass + + for silence_times in silences: + if (silence_times[1] - silence_times[0] >= 600 and silence_times[0] > 5000 and silence_times[1] < 15000): + sliced = audio[silence_times[1] - 500:].fade_in(400) + + sliced.export(stripped_path, format='mp3') + exported = True + + break + + if (not exported): + print('Failed to find the break in: '+full_path) + audio.export(stripped_path, format='mp3') + + +