Spaces:
Paused
Paused
Commit
·
4e0ef54
1
Parent(s):
3855dca
audio output bug fixing
Browse files
app.py
CHANGED
@@ -7,9 +7,9 @@ import azure.cognitiveservices.speech as speechsdk
|
|
7 |
openai.api_key = os.environ['OPEN_AI_KEY']
|
8 |
|
9 |
speech_config = speechsdk.SpeechConfig(subscription=os.environ['AZURE_SPEECH_KEY'], region="westeurope")
|
10 |
-
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
|
11 |
speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
|
12 |
-
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=
|
13 |
|
14 |
messages = [{"role": "system", "content": 'You are a clever and helpfull conversationalist. Respond to all input in 25 words or less. Do not mention that you are an AI or service of any kind.'}]
|
15 |
|
@@ -33,7 +33,8 @@ def transcribe(audio: str):
|
|
33 |
# engine.say(system_message['content'])
|
34 |
# engine.runAndWait()
|
35 |
|
36 |
-
|
|
|
37 |
|
38 |
# Checks result.
|
39 |
# if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
@@ -51,7 +52,7 @@ def transcribe(audio: str):
|
|
51 |
if message['role'] != 'system':
|
52 |
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
|
53 |
|
54 |
-
return chat_transcript
|
55 |
|
56 |
# set a custom theme
|
57 |
theme = gr.themes.Default().set(
|
@@ -65,9 +66,9 @@ with gr.Blocks(theme=theme) as ui:
|
|
65 |
|
66 |
# text transcript output and audio
|
67 |
text_output = gr.Textbox(label="Conversation Transcript")
|
68 |
-
|
69 |
|
70 |
btn = gr.Button("Run")
|
71 |
-
btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output])
|
72 |
|
73 |
ui.launch()
|
|
|
7 |
openai.api_key = os.environ['OPEN_AI_KEY']
|
8 |
|
9 |
speech_config = speechsdk.SpeechConfig(subscription=os.environ['AZURE_SPEECH_KEY'], region="westeurope")
|
10 |
+
#audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
|
11 |
speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
|
12 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)
|
13 |
|
14 |
messages = [{"role": "system", "content": 'You are a clever and helpfull conversationalist. Respond to all input in 25 words or less. Do not mention that you are an AI or service of any kind.'}]
|
15 |
|
|
|
33 |
# engine.say(system_message['content'])
|
34 |
# engine.runAndWait()
|
35 |
|
36 |
+
voice_reponse = speech_synthesizer.speak_text_async(system_message['content']).get()
|
37 |
+
stream = speechsdk.AudioDataStream(voice_reponse)
|
38 |
|
39 |
# Checks result.
|
40 |
# if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
|
|
52 |
if message['role'] != 'system':
|
53 |
chat_transcript += message['role'] + ": " + message['content'] + "\n\n"
|
54 |
|
55 |
+
return chat_transcript, voice_reponse
|
56 |
|
57 |
# set a custom theme
|
58 |
theme = gr.themes.Default().set(
|
|
|
66 |
|
67 |
# text transcript output and audio
|
68 |
text_output = gr.Textbox(label="Conversation Transcript")
|
69 |
+
audio_output = gr.Audio()
|
70 |
|
71 |
btn = gr.Button("Run")
|
72 |
+
btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, audio_output])
|
73 |
|
74 |
ui.launch()
|