Spaces:
Running
on
Zero
Running
on
Zero
liuyang
commited on
Commit
·
9f7c374
1
Parent(s):
d947708
update log, no vad
Browse files
app.py
CHANGED
|
@@ -369,7 +369,7 @@ class WhisperTranscriber:
|
|
| 369 |
options = dict(
|
| 370 |
language=language,
|
| 371 |
beam_size=5,
|
| 372 |
-
vad_filter=
|
| 373 |
vad_parameters=VadOptions(
|
| 374 |
max_speech_duration_s=whisper.feature_extractor.chunk_length,
|
| 375 |
min_speech_duration_ms=100,
|
|
@@ -392,6 +392,7 @@ class WhisperTranscriber:
|
|
| 392 |
segments = list(segments)
|
| 393 |
|
| 394 |
detected_language = transcript_info.language
|
|
|
|
| 395 |
|
| 396 |
# Process segments
|
| 397 |
results = []
|
|
@@ -420,7 +421,7 @@ class WhisperTranscriber:
|
|
| 420 |
|
| 421 |
transcription_time = time.time() - start_time
|
| 422 |
print(f"Full audio transcribed in {transcription_time:.2f} seconds using batch size {batch_size}")
|
| 423 |
-
print(results)
|
| 424 |
return results, detected_language
|
| 425 |
|
| 426 |
# Removed audio cutting; transcription is done once on the full (preprocessed) audio
|
|
@@ -474,7 +475,7 @@ class WhisperTranscriber:
|
|
| 474 |
# Convert to list format
|
| 475 |
diarize_segments = []
|
| 476 |
diarization_list = list(diarization.itertracks(yield_label=True))
|
| 477 |
-
print(diarization_list)
|
| 478 |
for turn, _, speaker in diarization_list:
|
| 479 |
diarize_segments.append({
|
| 480 |
"start": float(turn.start) + float(base_offset_s),
|
|
|
|
| 369 |
options = dict(
|
| 370 |
language=language,
|
| 371 |
beam_size=5,
|
| 372 |
+
vad_filter=False, # VAD is enabled by default for batched transcription
|
| 373 |
vad_parameters=VadOptions(
|
| 374 |
max_speech_duration_s=whisper.feature_extractor.chunk_length,
|
| 375 |
min_speech_duration_ms=100,
|
|
|
|
| 392 |
segments = list(segments)
|
| 393 |
|
| 394 |
detected_language = transcript_info.language
|
| 395 |
+
print("Detected language: ", detected_language, "segments: ", len(segments))
|
| 396 |
|
| 397 |
# Process segments
|
| 398 |
results = []
|
|
|
|
| 421 |
|
| 422 |
transcription_time = time.time() - start_time
|
| 423 |
print(f"Full audio transcribed in {transcription_time:.2f} seconds using batch size {batch_size}")
|
| 424 |
+
#print(results)
|
| 425 |
return results, detected_language
|
| 426 |
|
| 427 |
# Removed audio cutting; transcription is done once on the full (preprocessed) audio
|
|
|
|
| 475 |
# Convert to list format
|
| 476 |
diarize_segments = []
|
| 477 |
diarization_list = list(diarization.itertracks(yield_label=True))
|
| 478 |
+
#print(diarization_list)
|
| 479 |
for turn, _, speaker in diarization_list:
|
| 480 |
diarize_segments.append({
|
| 481 |
"start": float(turn.start) + float(base_offset_s),
|