Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -60,8 +60,8 @@ print(f"Using device: {device}, dtype: {dtype}")
|
|
60 |
|
61 |
pipe = pipeline(
|
62 |
"automatic-speech-recognition",
|
63 |
-
model="openai/whisper-
|
64 |
-
torch_dtype=torch.
|
65 |
device=device,
|
66 |
)
|
67 |
#vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
@@ -332,7 +332,13 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
332 |
|
333 |
if not ref_text.strip():
|
334 |
gr.Info("No reference text provided, transcribing reference audio...")
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
gr.Info("Finished transcription")
|
337 |
else:
|
338 |
gr.Info("Using custom reference text...")
|
|
|
60 |
|
61 |
pipe = pipeline(
|
62 |
"automatic-speech-recognition",
|
63 |
+
model="openai/whisper-medium-v3-turbo",
|
64 |
+
torch_dtype=torch.float16,
|
65 |
device=device,
|
66 |
)
|
67 |
#vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
|
|
332 |
|
333 |
if not ref_text.strip():
|
334 |
gr.Info("No reference text provided, transcribing reference audio...")
|
335 |
+
ref_text = outputs = pipe(
|
336 |
+
ref_audio,
|
337 |
+
chunk_length_s=30,
|
338 |
+
batch_size=128,
|
339 |
+
generate_kwargs={"task": "transcribe"},
|
340 |
+
return_timestamps=False,
|
341 |
+
)['text'].strip()
|
342 |
gr.Info("Finished transcription")
|
343 |
else:
|
344 |
gr.Info("Using custom reference text...")
|