Spaces:
Runtime error
Runtime error
Rachid Ammari
commited on
Commit
·
84024ab
1
Parent(s):
f9b0a05
refactored wav2vec models loading
Browse files
app.py
CHANGED
|
@@ -2,9 +2,12 @@ from transformers import pipeline
|
|
| 2 |
import gradio as gr
|
| 3 |
import whisper
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
| 8 |
whisper_model = whisper.load_model("base")
|
| 9 |
|
| 10 |
def transcribe_audio(language=None, mic=None, file=None):
|
|
@@ -14,22 +17,11 @@ def transcribe_audio(language=None, mic=None, file=None):
|
|
| 14 |
audio = file
|
| 15 |
else:
|
| 16 |
return "You must either provide a mic recording or a file"
|
| 17 |
-
wav2vec_model =
|
| 18 |
transcription = wav2vec_model(audio)["text"]
|
| 19 |
transcription2 = whisper_model.transcribe(audio, language=language)["text"]
|
| 20 |
return transcription, transcription2
|
| 21 |
|
| 22 |
-
def load_models(lang):
|
| 23 |
-
if lang == 'en':
|
| 24 |
-
return wav2vec_en_model
|
| 25 |
-
elif lang == 'fr':
|
| 26 |
-
return wav2vec_fr_model
|
| 27 |
-
elif lang == 'es':
|
| 28 |
-
return wav2vec_es_model
|
| 29 |
-
else:
|
| 30 |
-
# default english
|
| 31 |
-
return wav2vec_en_model
|
| 32 |
-
|
| 33 |
title = "Speech2text comparison (Wav2vec vs Whisper)"
|
| 34 |
description = """
|
| 35 |
This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import whisper
|
| 4 |
|
| 5 |
+
|
| 6 |
+
wav2vec_models = {
|
| 7 |
+
"en" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h"),
|
| 8 |
+
"fr" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-french"),
|
| 9 |
+
"es" : pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-spanish")
|
| 10 |
+
}
|
| 11 |
whisper_model = whisper.load_model("base")
|
| 12 |
|
| 13 |
def transcribe_audio(language=None, mic=None, file=None):
|
|
|
|
| 17 |
audio = file
|
| 18 |
else:
|
| 19 |
return "You must either provide a mic recording or a file"
|
| 20 |
+
wav2vec_model = wav2vec_models[language]
|
| 21 |
transcription = wav2vec_model(audio)["text"]
|
| 22 |
transcription2 = whisper_model.transcribe(audio, language=language)["text"]
|
| 23 |
return transcription, transcription2
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
title = "Speech2text comparison (Wav2vec vs Whisper)"
|
| 26 |
description = """
|
| 27 |
This Space allows easy comparisons for transcribed texts between Facebook's Wav2vec model and newly released OpenAI's Whisper model.\n
|