liuyang commited on
Commit
9f7c374
·
1 Parent(s): d947708

update log, no vad

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -369,7 +369,7 @@ class WhisperTranscriber:
369
  options = dict(
370
  language=language,
371
  beam_size=5,
372
- vad_filter=True, # VAD is enabled by default for batched transcription
373
  vad_parameters=VadOptions(
374
  max_speech_duration_s=whisper.feature_extractor.chunk_length,
375
  min_speech_duration_ms=100,
@@ -392,6 +392,7 @@ class WhisperTranscriber:
392
  segments = list(segments)
393
 
394
  detected_language = transcript_info.language
 
395
 
396
  # Process segments
397
  results = []
@@ -420,7 +421,7 @@ class WhisperTranscriber:
420
 
421
  transcription_time = time.time() - start_time
422
  print(f"Full audio transcribed in {transcription_time:.2f} seconds using batch size {batch_size}")
423
- print(results)
424
  return results, detected_language
425
 
426
  # Removed audio cutting; transcription is done once on the full (preprocessed) audio
@@ -474,7 +475,7 @@ class WhisperTranscriber:
474
  # Convert to list format
475
  diarize_segments = []
476
  diarization_list = list(diarization.itertracks(yield_label=True))
477
- print(diarization_list)
478
  for turn, _, speaker in diarization_list:
479
  diarize_segments.append({
480
  "start": float(turn.start) + float(base_offset_s),
 
369
  options = dict(
370
  language=language,
371
  beam_size=5,
372
+ vad_filter=False, # VAD is enabled by default for batched transcription
373
  vad_parameters=VadOptions(
374
  max_speech_duration_s=whisper.feature_extractor.chunk_length,
375
  min_speech_duration_ms=100,
 
392
  segments = list(segments)
393
 
394
  detected_language = transcript_info.language
395
+ print("Detected language: ", detected_language, "segments: ", len(segments))
396
 
397
  # Process segments
398
  results = []
 
421
 
422
  transcription_time = time.time() - start_time
423
  print(f"Full audio transcribed in {transcription_time:.2f} seconds using batch size {batch_size}")
424
+ #print(results)
425
  return results, detected_language
426
 
427
  # Removed audio cutting; transcription is done once on the full (preprocessed) audio
 
475
  # Convert to list format
476
  diarize_segments = []
477
  diarization_list = list(diarization.itertracks(yield_label=True))
478
+ #print(diarization_list)
479
  for turn, _, speaker in diarization_list:
480
  diarize_segments.append({
481
  "start": float(turn.start) + float(base_offset_s),