Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

Gregniuki commited on Nov 27, 2024

Commit

47167b8

verified ·

1 Parent(s): 1d6e329

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,8 +60,8 @@ print(f"Using device: {device}, dtype: {dtype}")
 pipe = pipeline(
     "automatic-speech-recognition",
-    model="openai/whisper-large-v3-turbo",
-    torch_dtype=torch.float32,
     device=device,
 )
 #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
@@ -332,7 +332,13 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
     if not ref_text.strip():
         gr.Info("No reference text provided, transcribing reference audio...")
-       # ref_text = gen_text
         gr.Info("Finished transcription")
     else:
         gr.Info("Using custom reference text...")

 pipe = pipeline(
     "automatic-speech-recognition",
+    model="openai/whisper-medium-v3-turbo",
+    torch_dtype=torch.float16,
     device=device,
 )
 #vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
     if not ref_text.strip():
         gr.Info("No reference text provided, transcribing reference audio...")
+        ref_text = outputs = pipe(
+            ref_audio,
+            chunk_length_s=30,
+            batch_size=128,
+            generate_kwargs={"task": "transcribe"},
+            return_timestamps=False,
+        )['text'].strip()
         gr.Info("Finished transcription")
     else:
         gr.Info("Using custom reference text...")