Spaces:

Ionut-Bostan
/

Emotion_Aware_TTS

Running

App Files Files Community

Ionut-Bostan commited on May 4, 2023

Commit

254a63f

1 Parent(s): d197937

moddified app.py

Browse files

Files changed (4) hide show

.DS_Store +0 -0
app.py +35 -10
output/.DS_Store +0 -0
output/result/.DS_Store +0 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -1,20 +1,45 @@
 import gradio as gr
 import subprocess
-predefined_texts = ["Example text 1", "Example text 2", "Example text 3"]
-def synthesize_speech(text, speaker_id):
-    command = f"python3 synthesize.py --text '{text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
     output = subprocess.check_output(command, shell=True)
-    # Replace this with the path of the generated audio file
-    audio_file = 'output_file_path'
     return audio_file
-iface = gr.Interface(fn=synthesize_speech,
-                     inputs=[gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
-                             gr.inputs.Slider(minimum=0, maximum=10, step=1, default=0, label="Speaker ID")],
-                     outputs=gr.outputs.Audio(type="file"),
-                     title="Text-to-Speech Demo")
 iface.launch()

 import gradio as gr
 import subprocess
+predefined_texts = [
+    "A combination of Canadian capital quickly organized and petitioned for the same privileges.",
+    "The date was nearly eighteen years old.",
+    "Hardly were our plans made public before we were met by powerful opposition.",
+]
+emotion_mapping = {"amused": 0, "anger": 1,
+                   "disgust": 2, "neutral": 3, "sleepiness": 4}
+def synthesize_speech(input_type, text, speaker_id, embed_type, emotion_id):
+    if input_type == "Choose from examples":
+        selected_text = text
+    else:
+        selected_text = input_type
+    if embed_type == "bert_embed":
+        command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
+    else:
+        command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_id} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
     output = subprocess.check_output(command, shell=True)
+    audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
     return audio_file
+iface = gr.Interface(
+    fn=synthesize_speech,
+    inputs=[
+        gr.inputs.Radio(
+            ["Choose from examples", "Enter your own text"], label="Input Type"),
+        gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
+        gr.inputs.Textbox(lines=2, label="Enter your own text"),
+        gr.inputs.Slider(minimum=0, maximum=10, step=1,
+                         default=0, label="Speaker ID"),
+        gr.inputs.Radio(["bert_embed", "emotion_id"], label="Embedding Type"),
+        gr.inputs.Dropdown(choices=emotion_mapping, label="Select Emotion"),
+    ],
+    outputs=gr.outputs.Audio(type="filepath"),
+    title="Text-to-Speech Demo",
+)
 iface.launch()

output/.DS_Store CHANGED Viewed

Binary files a/output/.DS_Store and b/output/.DS_Store differ

output/result/.DS_Store CHANGED Viewed

Binary files a/output/result/.DS_Store and b/output/result/.DS_Store differ