Spaces:
				
			
			
	
			
			
		Build error
		
	
	
	
			
			
	
	
	
	
		
		
		Build error
		
	Commit 
							
							·
						
						12c4d09
	
1
								Parent(s):
							
							61f8fb4
								
Properly restricted audio length on Spaces. (The space isn't built for song covers. Take that shit to Colab or local.)
Browse files
    	
        app.py
    CHANGED
    
    | @@ -28,6 +28,7 @@ from config import Config | |
| 28 | 
             
            config = Config()
         | 
| 29 | 
             
            logging.getLogger("numba").setLevel(logging.WARNING)
         | 
| 30 | 
             
            limitation = os.getenv("SYSTEM") == "spaces"
         | 
|  | |
| 31 |  | 
| 32 | 
             
            audio_mode = []
         | 
| 33 | 
             
            f0method_mode = ["pm", "crepe", "harvest"]
         | 
| @@ -50,7 +51,7 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v | |
| 50 | 
             
                            return "Please upload an audio file.", None
         | 
| 51 | 
             
                        sampling_rate, audio = vc_upload
         | 
| 52 | 
             
                        duration = audio.shape[0] / sampling_rate
         | 
| 53 | 
            -
                        if duration >  | 
| 54 | 
             
                            return "Too long! Please upload an audio file that is less than 1 minute.", None
         | 
| 55 | 
             
                        audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
         | 
| 56 | 
             
                        if len(audio.shape) > 1:
         | 
| @@ -58,12 +59,15 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v | |
| 58 | 
             
                        if sampling_rate != 16000:
         | 
| 59 | 
             
                            audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
         | 
| 60 | 
             
                    elif vc_audio_mode == "TTS Audio":
         | 
| 61 | 
            -
                        if len(tts_text) >  | 
| 62 | 
             
                            return "Text is too long.", None
         | 
| 63 | 
             
                        if tts_text is None or tts_voice is None:
         | 
| 64 | 
             
                            return "You need to enter text and select a voice.", None
         | 
| 65 | 
             
                        asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
         | 
| 66 | 
             
                        audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
         | 
|  | |
|  | |
|  | |
| 67 | 
             
                        vc_input = "tts.mp3"
         | 
| 68 | 
             
                    times = [0, 0, 0]
         | 
| 69 | 
             
                    f0_up_key = int(f0_up_key)
         | 
| @@ -379,7 +383,7 @@ if __name__ == '__main__': | |
| 379 | 
             
                                                vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
         | 
| 380 | 
             
                                                vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
         | 
| 381 | 
             
                                                # TTS
         | 
| 382 | 
            -
                                                tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input", interactive=True)
         | 
| 383 | 
             
                                                tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
         | 
| 384 | 
             
                                            with gr.Column():
         | 
| 385 | 
             
                                                vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')
         | 
|  | |
| 28 | 
             
            config = Config()
         | 
| 29 | 
             
            logging.getLogger("numba").setLevel(logging.WARNING)
         | 
| 30 | 
             
            limitation = os.getenv("SYSTEM") == "spaces"
         | 
| 31 | 
            +
            #limitation=True
         | 
| 32 |  | 
| 33 | 
             
            audio_mode = []
         | 
| 34 | 
             
            f0method_mode = ["pm", "crepe", "harvest"]
         | 
|  | |
| 51 | 
             
                            return "Please upload an audio file.", None
         | 
| 52 | 
             
                        sampling_rate, audio = vc_upload
         | 
| 53 | 
             
                        duration = audio.shape[0] / sampling_rate
         | 
| 54 | 
            +
                        if duration > 60 and limitation:
         | 
| 55 | 
             
                            return "Too long! Please upload an audio file that is less than 1 minute.", None
         | 
| 56 | 
             
                        audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
         | 
| 57 | 
             
                        if len(audio.shape) > 1:
         | 
|  | |
| 59 | 
             
                        if sampling_rate != 16000:
         | 
| 60 | 
             
                            audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
         | 
| 61 | 
             
                    elif vc_audio_mode == "TTS Audio":
         | 
| 62 | 
            +
                        if len(tts_text) > 250 and limitation:
         | 
| 63 | 
             
                            return "Text is too long.", None
         | 
| 64 | 
             
                        if tts_text is None or tts_voice is None:
         | 
| 65 | 
             
                            return "You need to enter text and select a voice.", None
         | 
| 66 | 
             
                        asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
         | 
| 67 | 
             
                        audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
         | 
| 68 | 
            +
                        duration = audio.shape[0] / sr
         | 
| 69 | 
            +
                        if duration > 30 and limitation:
         | 
| 70 | 
            +
                            return "Your text generated an audio that was too long.", None
         | 
| 71 | 
             
                        vc_input = "tts.mp3"
         | 
| 72 | 
             
                    times = [0, 0, 0]
         | 
| 73 | 
             
                    f0_up_key = int(f0_up_key)
         | 
|  | |
| 383 | 
             
                                                vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
         | 
| 384 | 
             
                                                vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
         | 
| 385 | 
             
                                                # TTS
         | 
| 386 | 
            +
                                                tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input (There is a limit of 250 characters)", interactive=True)
         | 
| 387 | 
             
                                                tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
         | 
| 388 | 
             
                                            with gr.Column():
         | 
| 389 | 
             
                                                vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')
         | 
