all-in-one

Running

App Files Files Community

helloWorld199 commited on Jun 14, 2024

Commit

b69ba0c

verified ·

1 Parent(s): d5095bd

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -12

app.py CHANGED Viewed

@@ -47,6 +47,22 @@ CACHE_EXAMPLES = os.getenv('CACHE_EXAMPLES', '1') == '1'
 base_dir = "/tmp/gradio/"
 def analyze(path):
   #Measure time for inference
   start = time.time()
@@ -64,6 +80,8 @@ def analyze(path):
     for file_path in files:
       json_structure_output = os.path.join(root, file_path)
       print(json_structure_output)
   fig = allin1.visualize(
     result,
@@ -107,9 +125,14 @@ def analyze(path):
 def add_voice_label(json_file, audio_path):
     # Load the JSON file
-    file_path = 'path_to_your_json_file.json'
-    with open(file_path, 'r') as f:
         data = json.load(f)
     # Access the segments
     segments = data['segments']
@@ -118,18 +141,30 @@ def add_voice_label(json_file, audio_path):
     for segment in segments:
         start = segment['start']
         end = segment['end']
-        audio_segment = get_audio_segment()
-    # Add the "voice" label to each segment. It contains either Yes or No.
-    for segment in segments:
-        segment['voice'] = contains_voice(segment)
-def get_audio_segment(audio_path, ):

 base_dir = "/tmp/gradio/"
+# Defining sample rate for voice activity detection (must use multiple of 8k)
+SAMPLING_RATE = 32000
+torch.set_num_threads(1)
+# Import of models to do voice detection
+model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
+                              model='silero_vad',
+                              force_reload=True,
+                              onnx=USE_ONNX)
+(get_speech_timestamps,
+ save_audio,
+ read_audio,
+ VADIterator,
+ collect_chunks) = utils
 def analyze(path):
   #Measure time for inference
   start = time.time()
     for file_path in files:
       json_structure_output = os.path.join(root, file_path)
       print(json_structure_output)
+  add_voice_label(json_structure_output, path)
   fig = allin1.visualize(
     result,
 def add_voice_label(json_file, audio_path):
     # Load the JSON file
+    with open(json_file, 'r') as f:
         data = json.load(f)
+    # Create VAD object
+    vad_iterator = VADIterator(model)
+    # Read input audio file
+    wav = read_audio(audio_path, sampling_rate=SAMPLING_RATE)
     # Access the segments
     segments = data['segments']
     for segment in segments:
         start = segment['start']
         end = segment['end']
+        start_sample = int(start*SAMPLING_RATE)
+        end_sample = int(end*SAMPLING_RATE)
+        speech_probs = []
+        window_size_samples = 1536
+        for i in range(0, len(wav), window_size_samples):
+            chunk = wav[i: i+ window_size_samples]
+            if len(chunk) < window_size_samples:
+              break
+            speech_prob = model(chunk, SAMPLING_RATE).item()
+            speech_probs.append(speech_prob)
+        vad_iterator.reset_states() # reset model states after each audio
+        mean_probability = np.mean(speech_probs)
+        print(mean_probability)
+        if mean_probability >= 0.7 :
+            segment['voice'] = "Yes"
+        else:
+            segment['voice'] = "No"
+    with open(json_file, 'w') as f:
+        json.dump(data, f, indent=4)