Spaces:

Emova-ollm
/

EMOVA-demo

Running on Zero

App Files Files Community

KaiChen1998 commited on Nov 5, 2024

Commit

91deaa2

1 Parent(s): 7925e97

change time limit

Browse files

Files changed (1) hide show

app.py +3 -3

app.py CHANGED Viewed

@@ -56,11 +56,11 @@ asr_format = "Please recognize the text corresponding to the follwing speech.\n"
 tts_format = "Please synthesize the speech corresponding to the follwing text.\n"
 chat_format = r'Please recognize the texts, emotion and pitch from the user question speech units and provide the texts, emotion, pitch and speech units for the assistant response. \nEmotion should be chosen from ["neutral", "happy", "sad", "angry", "surprised", "disgusted", "fearful"]. \nPitch should be chosen from ["low", "normal", "high"].\nYour output should be in json format.\nAn output example is:\n{"user question text": "", "user question emotion": "", "user question pitch": "", "assistant response text": "", "assistant response emotion": "", "assistant response pitch": ""，"assistant response speech": ""}\n\nuser question speech:'
-@spaces.GPU(duration=15)
 def s2u_asr(text, audio_file):
     return asr_format + s2u_extract_unit_demo(s2u_model, audio_file, model_name=s2u_model_name, reduced=reduced)
-@spaces.GPU(duration=15)
 def s2u_chat(text, audio_file):
     return chat_format + s2u_extract_unit_demo(s2u_model, audio_file, model_name=s2u_model_name, reduced=reduced)
@@ -192,7 +192,7 @@ def add_text(state, text, image, image_process_mode, audio_input, audio_mode):
 # Input: [state, temperature, top_p, max_output_tokens, speaker]
 # Return: [state, chatbot] + btn_list
 ############
-@spaces.GPU(duration=90)
 def http_bot(state, temperature, top_p, max_new_tokens, speaker):
     logging.info(f"http_bot.")

 tts_format = "Please synthesize the speech corresponding to the follwing text.\n"
 chat_format = r'Please recognize the texts, emotion and pitch from the user question speech units and provide the texts, emotion, pitch and speech units for the assistant response. \nEmotion should be chosen from ["neutral", "happy", "sad", "angry", "surprised", "disgusted", "fearful"]. \nPitch should be chosen from ["low", "normal", "high"].\nYour output should be in json format.\nAn output example is:\n{"user question text": "", "user question emotion": "", "user question pitch": "", "assistant response text": "", "assistant response emotion": "", "assistant response pitch": ""，"assistant response speech": ""}\n\nuser question speech:'
+@spaces.GPU(duration=10)
 def s2u_asr(text, audio_file):
     return asr_format + s2u_extract_unit_demo(s2u_model, audio_file, model_name=s2u_model_name, reduced=reduced)
+@spaces.GPU(duration=10)
 def s2u_chat(text, audio_file):
     return chat_format + s2u_extract_unit_demo(s2u_model, audio_file, model_name=s2u_model_name, reduced=reduced)
 # Input: [state, temperature, top_p, max_output_tokens, speaker]
 # Return: [state, chatbot] + btn_list
 ############
+@spaces.GPU
 def http_bot(state, temperature, top_p, max_new_tokens, speaker):
     logging.info(f"http_bot.")