Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
#Get models
|
| 4 |
+
#ASR model for input speech
|
| 5 |
+
speech2text = gr.Interface.load("huggingface/facebook/wav2vec2-base-960h",
|
| 6 |
+
inputs=gr.inputs.Audio(label="Record Audio File", type="file", source = "microphone"))
|
| 7 |
+
|
| 8 |
+
#translates english to spanish text
|
| 9 |
+
translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
|
| 10 |
+
outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
|
| 11 |
+
#TTS model for output speech
|
| 12 |
+
text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
|
| 13 |
+
outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
|
| 14 |
+
allow_flagging="never")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
translate = gr.Series(speech2text, translator) #outputs Spanish text translation
|
| 18 |
+
en2es = gr.Series(translate, text2speech) #outputs Spanish audio
|
| 19 |
+
ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio
|
| 20 |
+
|
| 21 |
+
#gradio interface
|
| 22 |
+
ui.title = "English to Spanish Speech Translator"
|
| 23 |
+
ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
|
| 24 |
+
ui.examples = [['ljspeech.wav'],['ljspeech2.wav',]]
|
| 25 |
+
ui.theme = "peach"
|
| 26 |
+
ui.article = article=""<h2>Pre-trained model Information</h2>
|
| 27 |
+
<h3>Automatic Speech Recognition</h3>
|
| 28 |
+
<p style='text-align: justify'>The model used for the ASR part of this space is from
|
| 29 |
+
<https://huggingface.co/facebook/wav2vec2-base-960h> which is pretrained and fine-tuned on <b>960 hours of
|
| 30 |
+
Librispeech</b> on 16kHz sampled speech audio. This model has a <b>word error rate (WER)</b> of <b>8.6 percent on
|
| 31 |
+
noisy speech</b> and <b>5.2 percent on clean speech</b> on the standard LibriSpeech benchmark. More information can be
|
| 32 |
+
found on its website at <https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/> and
|
| 33 |
+
original model is under <https://github.com/pytorch/fairseq/tree/main/examples/wav2vec>.</p>
|
| 34 |
+
<h3>Text Translator</h3>
|
| 35 |
+
<p style='text-align: justify'>The English to Spanish text translator pre-trained model is from <Helsinki-NLP/opus-
|
| 36 |
+
mt-en-es> which is part of the <b>The Tatoeba Translation Challenge (v2021-08-07)</b> as seen from its github repo at
|
| 37 |
+
<https://github.com/Helsinki-NLP/Tatoeba-Challenge>. This project aims to develop machine translation in real-world
|
| 38 |
+
cases for many languages. </p>
|
| 39 |
+
<h3>Text to Speech</h3>
|
| 40 |
+
<p style='text-align: justify'> The TTS model used is from <https://huggingface.co/facebook/tts_transformer-es-css10>.
|
| 41 |
+
This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS
|
| 42 |
+
for Spanish. More information can be seen on their git at <https://github.com/pytorch/fairseq>. </p>
|
| 43 |
+
""
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
ui.launch(inbrowser=True)
|