Spaces:
Paused
Paused
Commit
·
d086a4b
1
Parent(s):
1b653d3
Custom voice + Gradio improvements
Browse files- TARS-discourse.py +41 -5
TARS-discourse.py
CHANGED
@@ -3,9 +3,15 @@ import openai
|
|
3 |
import config
|
4 |
import os
|
5 |
import pyttsx3
|
|
|
|
|
6 |
openai.api_key = config.OPENAI_API_KEY
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def transcribe(audio: str):
|
11 |
global messages
|
@@ -23,9 +29,22 @@ def transcribe(audio: str):
|
|
23 |
system_message = response["choices"][0]["message"]
|
24 |
messages.append(system_message)
|
25 |
|
26 |
-
engine = pyttsx3.init()
|
27 |
-
engine.say(system_message['content'])
|
28 |
-
engine.runAndWait()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
chat_transcript = ""
|
31 |
for message in messages:
|
@@ -34,4 +53,21 @@ def transcribe(audio: str):
|
|
34 |
|
35 |
return chat_transcript
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import config
|
4 |
import os
|
5 |
import pyttsx3
|
6 |
+
import azure.cognitiveservices.speech as speechsdk
|
7 |
+
|
8 |
openai.api_key = config.OPENAI_API_KEY
|
9 |
|
10 |
+
speech_config = speechsdk.SpeechConfig(subscription=config.AZURE_VOICE_KEY, region=config.AZURE_VOICE_LOCATION)
|
11 |
+
speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
|
12 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
|
13 |
+
|
14 |
+
messages = [{"role": "system", "content": 'You are a clever and helpfull conversationalist. Respond to all input in 25 words or less. Do not mention that you are an AI or service of any kind.'}]
|
15 |
|
16 |
def transcribe(audio: str):
|
17 |
global messages
|
|
|
29 |
system_message = response["choices"][0]["message"]
|
30 |
messages.append(system_message)
|
31 |
|
32 |
+
# engine = pyttsx3.init()
|
33 |
+
# engine.say(system_message['content'])
|
34 |
+
# engine.runAndWait()
|
35 |
+
|
36 |
+
result = speech_synthesizer.speak_text_async(system_message['content']).get()
|
37 |
+
|
38 |
+
# Checks result.
|
39 |
+
# if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
40 |
+
# print("Speech synthesized to speaker for text [{}]".format(text))
|
41 |
+
# elif result.reason == speechsdk.ResultReason.Canceled:
|
42 |
+
# cancellation_details = result.cancellation_details
|
43 |
+
# print("Speech synthesis canceled: {}".format(cancellation_details.reason))
|
44 |
+
# if cancellation_details.reason == speechsdk.CancellationReason.Error:
|
45 |
+
# if cancellation_details.error_details:
|
46 |
+
# print("Error details: {}".format(cancellation_details.error_details))
|
47 |
+
# print("Did you update the subscription info?")
|
48 |
|
49 |
chat_transcript = ""
|
50 |
for message in messages:
|
|
|
53 |
|
54 |
return chat_transcript
|
55 |
|
56 |
+
# set a custom theme
|
57 |
+
theme = gr.themes.Default().set(
|
58 |
+
body_background_fill="#000000",
|
59 |
+
)
|
60 |
+
|
61 |
+
with gr.Blocks(theme=theme) as ui:
|
62 |
+
# advisor image input and microphone input
|
63 |
+
advisor = gr.Image(value=config.ADVISOR_IMAGE).style(width=config.ADVISOR_IMAGE_WIDTH, height=config.ADVISOR_IMAGE_HEIGHT)
|
64 |
+
audio_input = gr.Audio(source="microphone", type="filepath")
|
65 |
+
|
66 |
+
# text transcript output and audio
|
67 |
+
text_output = gr.Textbox(label="Conversation Transcript")
|
68 |
+
audio_output = gr.Audio()
|
69 |
+
|
70 |
+
btn = gr.Button("Run")
|
71 |
+
btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, audio_output])
|
72 |
+
|
73 |
+
ui.launch(debug=True, share=True)
|