Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -2,7 +2,7 @@ import torch | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import random
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
            -
             | 
| 6 | 
             
            from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, pipeline
         | 
| 7 | 
             
            import base64
         | 
| 8 | 
             
            from datasets import load_dataset
         | 
| @@ -26,18 +26,13 @@ def guessanAge(model, image): | |
| 26 | 
             
                return description    
         | 
| 27 |  | 
| 28 | 
             
            @spaces.GPU(duration=120)
         | 
| 29 | 
            -
            def text2speech( | 
|  | |
| 30 | 
             
                print(voice)
         | 
| 31 | 
             
                if len(text) > 0:
         | 
| 32 | 
            -
                     | 
| 33 | 
            -
                    
         | 
| 34 | 
            -
                     | 
| 35 | 
            -
                    speaker_embedding = torch.tensor(embeddings_dataset[voice]["xvector"]).unsqueeze(0)
         | 
| 36 | 
            -
                    
         | 
| 37 | 
            -
                    speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
         | 
| 38 | 
            -
                    audio_data = np.frombuffer(speech["audio"], dtype=np.float32)
         | 
| 39 | 
            -
                    audio_data_16bit = (audio_data * 32767).astype(np.int16)
         | 
| 40 | 
            -
                    return speech["sampling_rate"], audio_data_16bit
         | 
| 41 |  | 
| 42 | 
             
            @spaces.GPU
         | 
| 43 | 
             
            def ImageGenFromText(text, model):
         | 
| @@ -77,11 +72,9 @@ tab2 = gr.Interface( | |
| 77 | 
             
                outputs=["text"],
         | 
| 78 | 
             
            )
         | 
| 79 | 
             
            textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
         | 
| 80 | 
            -
            radio3 = gr.Radio(["microsoft/speecht5_tts"], value="microsoft/speecht5_tts", label="Select an tts", info="Age Classifier")
         | 
| 81 | 
            -
            radio3_1 = gr.Radio([("Scottish male (awb)", 0), ("US male (bdl)", 1138), ("US female (clb)", 2271), ("Canadian male (jmk)",3403), ("Indian male (ksp)", 4535), ("US male (rms)", 5667), ("US female (slt)", 6799)], value=4535)
         | 
| 82 | 
             
            tab3 = gr.Interface(
         | 
| 83 | 
             
                fn=text2speech,
         | 
| 84 | 
            -
                inputs=[ | 
| 85 | 
             
                outputs=["audio"],
         | 
| 86 | 
             
            )
         | 
| 87 |  | 
|  | |
| 2 | 
             
            import os
         | 
| 3 | 
             
            import random
         | 
| 4 | 
             
            import gradio as gr
         | 
| 5 | 
            +
            from TTS.api import TTS
         | 
| 6 | 
             
            from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, pipeline
         | 
| 7 | 
             
            import base64
         | 
| 8 | 
             
            from datasets import load_dataset
         | 
|  | |
| 26 | 
             
                return description    
         | 
| 27 |  | 
| 28 | 
             
            @spaces.GPU(duration=120)
         | 
| 29 | 
            +
            def text2speech(text, sample):
         | 
| 30 | 
            +
                print(TTS().list_models())
         | 
| 31 | 
             
                print(voice)
         | 
| 32 | 
             
                if len(text) > 0:
         | 
| 33 | 
            +
                    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
         | 
| 34 | 
            +
                    wav = tts.tts(text="Hello world!", speaker_wav=sample, language="en")
         | 
| 35 | 
            +
                    return wav
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 36 |  | 
| 37 | 
             
            @spaces.GPU
         | 
| 38 | 
             
            def ImageGenFromText(text, model):
         | 
|  | |
| 72 | 
             
                outputs=["text"],
         | 
| 73 | 
             
            )
         | 
| 74 | 
             
            textbox = gr.Textbox(value="good morning pineapple! looking very good very nice!")
         | 
|  | |
|  | |
| 75 | 
             
            tab3 = gr.Interface(
         | 
| 76 | 
             
                fn=text2speech,
         | 
| 77 | 
            +
                inputs=[textbox, "microphone"],
         | 
| 78 | 
             
                outputs=["audio"],
         | 
| 79 | 
             
            )
         | 
| 80 |  |