Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -47,6 +47,22 @@ CACHE_EXAMPLES = os.getenv('CACHE_EXAMPLES', '1') == '1'
|
|
47 |
|
48 |
base_dir = "/tmp/gradio/"
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
def analyze(path):
|
51 |
#Measure time for inference
|
52 |
start = time.time()
|
@@ -64,6 +80,8 @@ def analyze(path):
|
|
64 |
for file_path in files:
|
65 |
json_structure_output = os.path.join(root, file_path)
|
66 |
print(json_structure_output)
|
|
|
|
|
67 |
|
68 |
fig = allin1.visualize(
|
69 |
result,
|
@@ -107,9 +125,14 @@ def analyze(path):
|
|
107 |
|
108 |
def add_voice_label(json_file, audio_path):
|
109 |
# Load the JSON file
|
110 |
-
|
111 |
-
with open(file_path, 'r') as f:
|
112 |
data = json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
# Access the segments
|
115 |
segments = data['segments']
|
@@ -118,18 +141,30 @@ def add_voice_label(json_file, audio_path):
|
|
118 |
for segment in segments:
|
119 |
start = segment['start']
|
120 |
end = segment['end']
|
121 |
-
|
122 |
-
audio_segment = get_audio_segment()
|
123 |
|
|
|
|
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
|
135 |
|
|
|
47 |
|
48 |
base_dir = "/tmp/gradio/"
|
49 |
|
50 |
+
# Defining sample rate for voice activity detection (must use multiple of 8k)
|
51 |
+
SAMPLING_RATE = 32000
|
52 |
+
torch.set_num_threads(1)
|
53 |
+
|
54 |
+
# Import of models to do voice detection
|
55 |
+
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
56 |
+
model='silero_vad',
|
57 |
+
force_reload=True,
|
58 |
+
onnx=USE_ONNX)
|
59 |
+
|
60 |
+
(get_speech_timestamps,
|
61 |
+
save_audio,
|
62 |
+
read_audio,
|
63 |
+
VADIterator,
|
64 |
+
collect_chunks) = utils
|
65 |
+
|
66 |
def analyze(path):
|
67 |
#Measure time for inference
|
68 |
start = time.time()
|
|
|
80 |
for file_path in files:
|
81 |
json_structure_output = os.path.join(root, file_path)
|
82 |
print(json_structure_output)
|
83 |
+
|
84 |
+
add_voice_label(json_structure_output, path)
|
85 |
|
86 |
fig = allin1.visualize(
|
87 |
result,
|
|
|
125 |
|
126 |
def add_voice_label(json_file, audio_path):
|
127 |
# Load the JSON file
|
128 |
+
with open(json_file, 'r') as f:
|
|
|
129 |
data = json.load(f)
|
130 |
+
|
131 |
+
# Create VAD object
|
132 |
+
vad_iterator = VADIterator(model)
|
133 |
+
|
134 |
+
# Read input audio file
|
135 |
+
wav = read_audio(audio_path, sampling_rate=SAMPLING_RATE)
|
136 |
|
137 |
# Access the segments
|
138 |
segments = data['segments']
|
|
|
141 |
for segment in segments:
|
142 |
start = segment['start']
|
143 |
end = segment['end']
|
|
|
|
|
144 |
|
145 |
+
start_sample = int(start*SAMPLING_RATE)
|
146 |
+
end_sample = int(end*SAMPLING_RATE)
|
147 |
|
148 |
+
speech_probs = []
|
149 |
+
window_size_samples = 1536
|
150 |
+
for i in range(0, len(wav), window_size_samples):
|
151 |
+
chunk = wav[i: i+ window_size_samples]
|
152 |
+
if len(chunk) < window_size_samples:
|
153 |
+
break
|
154 |
+
speech_prob = model(chunk, SAMPLING_RATE).item()
|
155 |
+
speech_probs.append(speech_prob)
|
156 |
+
vad_iterator.reset_states() # reset model states after each audio
|
157 |
+
|
158 |
+
mean_probability = np.mean(speech_probs)
|
159 |
+
print(mean_probability)
|
160 |
+
|
161 |
+
if mean_probability >= 0.7 :
|
162 |
+
segment['voice'] = "Yes"
|
163 |
+
else:
|
164 |
+
segment['voice'] = "No"
|
165 |
+
|
166 |
+
with open(json_file, 'w') as f:
|
167 |
+
json.dump(data, f, indent=4)
|
168 |
|
169 |
|
170 |
|