Create python script to strip out the start of the audio files

2021-05-27 12:52:34 +01:00
parent a7ef272d42
commit 10513f60f4
1 changed files with 36 additions and 0 deletions
--- a/dialectsnipper.py
+++ b/dialectsnipper.py
@@ -0,0 +1,36 @@
+#! /usr/bin/python
+
+import sys
+import os
+from pathlib import Path
+from pydub import AudioSegment, silence
+
+directory = str(sys.argv[1])
+
+for path in Path(directory).rglob('*.mp3'):
+    full_path = str(path)
+    audio = AudioSegment.from_mp3(full_path)
+    silences = silence.detect_silence(audio, 500, -35, 50)
+    exported = False
+    sub_path = full_path[len(directory):].strip('/')
+    stripped_path = str(Path(directory).parent).rstrip('/') + '/strippedAccents/' + sub_path
+    try:
+        os.makedirs(os.path.dirname(stripped_path))
+    except:
+        pass
+
+    for silence_times in silences:
+        if (silence_times[1] - silence_times[0] >= 600 and silence_times[0] > 5000 and silence_times[1] < 15000):
+            sliced = audio[silence_times[1] - 500:].fade_in(400)
+
+            sliced.export(stripped_path, format='mp3')
+            exported = True
+
+            break
+
+    if (not exported):
+        print('Failed to find the break in: '+full_path)
+        audio.export(stripped_path, format='mp3')
+
+
+