Spaces:

Mohssinibra
/

STTDARIJAAPI

Running

App Files Files Community

Mohssinibra commited on Feb 7

Commit

d04bf8d

verified ·

1 Parent(s): d0830a2

../

Browse files

Files changed (1) hide show

app.py +25 -5

app.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import gradio as gr
 import librosa
 import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 # Load pre-trained model and processor directly from Hugging Face Hub
 model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
 processor = Wav2Vec2Processor.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
 def transcribe_audio(audio):
     # Load the audio file from Gradio interface
     audio_array, sr = librosa.load(audio, sr=16000)
@@ -20,15 +24,31 @@ def transcribe_audio(audio):
     # Get the predicted tokens
     tokens = torch.argmax(logits, axis=-1)
-    # Decode the tokens into text
     transcription = processor.decode(tokens[0])
-    return transcription
 # Create a Gradio interface for uploading audio or recording from the browser
 demo = gr.Interface(fn=transcribe_audio,
                     inputs=gr.Audio(type="filepath"),  # Corrected input component
-                    outputs="text")
 demo.launch()
-demo.launch(api=True,share=True)

 import gradio as gr
 import librosa
 import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, MBartForConditionalGeneration, MBart50Tokenizer
 # Load pre-trained model and processor directly from Hugging Face Hub
 model = Wav2Vec2ForCTC.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
 processor = Wav2Vec2Processor.from_pretrained("boumehdi/wav2vec2-large-xlsr-moroccan-darija")
+# Load translation model
+translation_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
+translation_tokenizer = MBart50Tokenizer.from_pretrained("facebook/mbart-large-50-many-to-many-mmt", src_lang="ar")
 def transcribe_audio(audio):
     # Load the audio file from Gradio interface
     audio_array, sr = librosa.load(audio, sr=16000)
     # Get the predicted tokens
     tokens = torch.argmax(logits, axis=-1)
+    # Decode the tokens into text (Darija transcription)
     transcription = processor.decode(tokens[0])
+    # Translate the transcription to English
+    translation = translate_text(transcription)
+    return transcription, translation
+def translate_text(text):
+    # Tokenize the text to translate
+    inputs = translation_tokenizer(text, return_tensors="pt")
+    # Generate translated tokens (from Darija to English)
+    translated_tokens = translation_model.generate(**inputs, forced_bos_token_id=translation_tokenizer.lang_code_to_id["en"])
+    # Decode the translated tokens into text
+    translated_text = translation_tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
+    return translated_text
 # Create a Gradio interface for uploading audio or recording from the browser
 demo = gr.Interface(fn=transcribe_audio,
                     inputs=gr.Audio(type="filepath"),  # Corrected input component
+                    outputs=["text", "text"],  # Both transcription and translation outputs
+                    live=True)
 demo.launch()
+demo.launch(api=True, share=True)