Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	File size: 3,773 Bytes
			
			| feba911 c52add3 36e373b 40817ec f794f86 c52add3 feba911 c52add3 feba911 c52add3 feba911 2ca0f71 933b458 feba911 33b8d42 d3cc82a db5c8d0 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 8ce4e5c d3cc82a 8ce4e5c 33b8d42 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 33b8d42 933b458 feba911 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import gradio as gr
#Get models
#ASR model for input speech
ui = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
                                inputs=gr.inputs.Audio(label="Record Audio", type="filepath", source = "microphone"))
                                     
#translates English to Spanish text                      
#translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
#                                outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
#TTS model for output speech                                
#text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
#                                outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
#                                allow_flagging="never")
                                
#ui = gr.Series(speech2text, translator) #outputs Spanish text translation
#en2es = gr.Series(translate, text2speech) #outputs Spanish audio
#ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio
#gradio interface
ui.title = "English to Spanish Speech Translator"
ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
ui.examples = [['ljspeech.wav'],['ljspeech2.wav'], ['longspeech.wav']]
ui.allow_flagging = "never" 
ui.theme = "peach"
ui.article = """<h2>Pre-trained model Information</h2>
                <h3>Automatic Speech Recognition</h3>
                <p style='text-align: justify'>The model used for the ASR part of this space is from                
                <a href=\"https://huggingface.co/facebook/hubert-large-ls960-ft">hubert-large-ls960-ft</a> which is pretrained and fine-tuned on <b>960 hours of 
                Librispeech</b> on 16kHz sampled speech audio. This model has a self-reported <b>word error rate (WER)</b> of <b>1.9 
                percent</b> and ranks first in <i>paperswithcode</i> for ASR on Librispeech. More information can be 
                found on its website at <a href=\"https://ai.facebook.com/blog/hubert-self-supervised-representation-learning-for-speech-
                recognition-
                generation-and-compression">hubert-self</a> and 
                original model is under <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/hubert">pytorch/fairseq</a>.</p>
                <h3>Text Translator</h3>
                <p style='text-align: justify'>The English to Spanish text translator pre-trained model is from 
                <a href=\"https://huggingface.co/Helsinki-NLP/opus-mt-en-es">Helsinki-NLP/opus-mt-en-es</a> which is part of the <b>The 
                Tatoeba Translation Challenge 
                (v2021-08-07)</b> as seen from its github repo at 
                <a href=\"https://github.com/Helsinki-NLP/Tatoeba-Challenge">Helsinki-NLP/Tatoeba-Challenge</a>. This project aims to develop 
                machine 
                translation in real-world 
                cases for many languages. </p>
                <h3>Text to Speech</h3>
                <p style='text-align: justify'> The TTS model used is from <a href=\"https://huggingface.co/facebook/tts_transformer-es-
                css10">facebook/tts_transformer-es-
                css10</a>. 
                This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS 
                for Spanish. More information can be seen on their git at 
                <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis">speech_synthesis</a>. </p>
            """           
                                             
ui.launch(inbrowser=True)
 |