Dirk Haupt
commited on
Commit
·
f0e07cf
1
Parent(s):
aa6b9fb
test if voice works on hf if microphone access is requested
Browse files- app.py +154 -47
- app_old.py +90 -0
app.py
CHANGED
@@ -4,6 +4,8 @@ from quickstart import WebSocketHandler, AsyncHumeClient, ChatConnectOptions, Mi
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
import chainlit as cl
|
|
|
|
|
7 |
|
8 |
# Page config
|
9 |
st.set_page_config(
|
@@ -17,64 +19,169 @@ st.title("Hume.ai Voice Chat Demo")
|
|
17 |
# Load environment variables
|
18 |
load_dotenv()
|
19 |
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
|
23 |
options = ChatConnectOptions(
|
24 |
config_id=os.getenv("HUME_CONFIG_ID"),
|
25 |
secret_key=os.getenv("HUME_SECRET_KEY")
|
26 |
)
|
27 |
|
28 |
-
# Create a custom WebSocketHandler that updates Chainlit
|
29 |
-
class ChainlitWebSocketHandler(WebSocketHandler):
|
30 |
-
async def on_message(self, message: SubscribeEvent):
|
31 |
-
await super().on_message(message)
|
32 |
-
|
33 |
-
if message.type in ["user_message", "assistant_message"]:
|
34 |
-
role = message.message.role
|
35 |
-
message_text = message.message.content
|
36 |
-
|
37 |
-
# Create emotion text if available
|
38 |
-
emotion_text = ""
|
39 |
-
if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
|
40 |
-
scores = dict(message.models.prosody.scores)
|
41 |
-
top_3_emotions = self._extract_top_n_emotions(scores, 3)
|
42 |
-
emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
|
43 |
-
|
44 |
-
# Send message to Chainlit
|
45 |
-
content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
|
46 |
-
await cl.Message(
|
47 |
-
content=content,
|
48 |
-
author=role.capitalize()
|
49 |
-
).send()
|
50 |
-
|
51 |
websocket_handler = ChainlitWebSocketHandler()
|
|
|
52 |
|
53 |
-
async
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
@cl.on_chat_start
|
74 |
async def start():
|
75 |
-
await cl.Message(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
@cl.on_audio_chunk
|
78 |
@cl.on_audio_start
|
79 |
-
async def
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import os
|
5 |
from dotenv import load_dotenv
|
6 |
import chainlit as cl
|
7 |
+
# from uuid import uuid4
|
8 |
+
from chainlit.logger import logger
|
9 |
|
10 |
# Page config
|
11 |
st.set_page_config(
|
|
|
19 |
# Load environment variables
|
20 |
load_dotenv()
|
21 |
|
22 |
+
# Create a custom WebSocketHandler that updates Chainlit
|
23 |
+
class ChainlitWebSocketHandler(WebSocketHandler):
|
24 |
+
async def on_message(self, message: SubscribeEvent):
|
25 |
+
await super().on_message(message)
|
26 |
+
|
27 |
+
if message.type in ["user_message", "assistant_message"]:
|
28 |
+
role = message.message.role
|
29 |
+
message_text = message.message.content
|
30 |
+
|
31 |
+
# Create emotion text if available
|
32 |
+
emotion_text = ""
|
33 |
+
if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
|
34 |
+
scores = dict(message.models.prosody.scores)
|
35 |
+
top_3_emotions = self._extract_top_n_emotions(scores, 3)
|
36 |
+
emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
|
37 |
+
|
38 |
+
# Send message to Chainlit
|
39 |
+
content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
|
40 |
+
await cl.Message(
|
41 |
+
content=content,
|
42 |
+
author=role.capitalize()
|
43 |
+
).send()
|
44 |
+
|
45 |
+
|
46 |
+
async def setup_hume_realtime():
|
47 |
+
"""Instantiate and configure the Hume Realtime Client"""
|
48 |
client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
|
49 |
options = ChatConnectOptions(
|
50 |
config_id=os.getenv("HUME_CONFIG_ID"),
|
51 |
secret_key=os.getenv("HUME_SECRET_KEY")
|
52 |
)
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
websocket_handler = ChainlitWebSocketHandler()
|
55 |
+
# cl.user_session.set("track_id", str(uuid4()))
|
56 |
|
57 |
+
# async def handle_conversation_updated(message):
|
58 |
+
# """Currently used to stream responses back to the client."""
|
59 |
+
# if message.type == "audio_output":
|
60 |
+
# # Handle audio streaming
|
61 |
+
# message_bytes = base64.b64decode(message.data.encode("utf-8"))
|
62 |
+
# await cl.context.emitter.send_audio_chunk(
|
63 |
+
# cl.OutputAudioChunk(
|
64 |
+
# mimeType="pcm16",
|
65 |
+
# data=message_bytes,
|
66 |
+
# track=cl.user_session.get("track_id")
|
67 |
+
# )
|
68 |
+
# )
|
69 |
+
# elif message.type in ["user_message", "assistant_message"]:
|
70 |
+
# # Handle text and emotion data
|
71 |
+
# role = message.message.role
|
72 |
+
# message_text = message.message.content
|
73 |
+
|
74 |
+
# # Create emotion text if available
|
75 |
+
# emotion_text = ""
|
76 |
+
# if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
|
77 |
+
# scores = dict(message.models.prosody.scores)
|
78 |
+
# top_3_emotions = websocket_handler._extract_top_n_emotions(scores, 3)
|
79 |
+
# emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
|
80 |
+
|
81 |
+
# # Send message to Chainlit
|
82 |
+
# content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
|
83 |
+
# await cl.Message(
|
84 |
+
# content=content,
|
85 |
+
# author=role.capitalize()
|
86 |
+
# ).send()
|
87 |
+
|
88 |
+
# async def handle_conversation_interrupt(event):
|
89 |
+
# """Used to cancel the client previous audio playback."""
|
90 |
+
# cl.user_session.set("track_id", str(uuid4()))
|
91 |
+
# await cl.context.emitter.send_audio_interrupt()
|
92 |
|
93 |
+
# async def handle_error(error):
|
94 |
+
# logger.error(error)
|
95 |
+
# await cl.Message(content=f"Error: {str(error)}").send()
|
96 |
+
|
97 |
+
# Override the websocket handler's methods
|
98 |
+
# websocket_handler.on_message = handle_conversation_updated
|
99 |
+
# websocket_handler.on_error = handle_error
|
100 |
+
|
101 |
+
# Store the handler in the session
|
102 |
+
cl.user_session.set("hume_websocket_handler", websocket_handler)
|
103 |
+
cl.user_session.set("hume_client", client)
|
104 |
+
cl.user_session.set("hume_options", options)
|
105 |
+
|
106 |
|
107 |
@cl.on_chat_start
|
108 |
async def start():
|
109 |
+
await cl.Message(
|
110 |
+
content="Welcome to the Chainlit x Hume.ai realtime example. Press `P` to talk!"
|
111 |
+
).send()
|
112 |
+
await setup_hume_realtime()
|
113 |
+
|
114 |
+
# @cl.on_message
|
115 |
+
# async def on_message(message: cl.Message):
|
116 |
+
# socket = cl.user_session.get("hume_socket")
|
117 |
+
# if socket and socket.is_connected():
|
118 |
+
# await socket.send_user_input(message.content)
|
119 |
+
# else:
|
120 |
+
# await cl.Message(content="Please activate voice mode before sending messages!").send()
|
121 |
|
|
|
122 |
@cl.on_audio_start
|
123 |
+
async def on_audio_start():
|
124 |
+
try:
|
125 |
+
client = cl.user_session.get("hume_client")
|
126 |
+
options = cl.user_session.get("hume_options")
|
127 |
+
websocket_handler = cl.user_session.get("hume_websocket_handler")
|
128 |
+
|
129 |
+
if not all([client, options, websocket_handler]):
|
130 |
+
raise Exception("Hume.ai client not properly initialized!")
|
131 |
+
|
132 |
+
# Create a new context manager
|
133 |
+
connection = client.empathic_voice.chat.connect_with_callbacks(
|
134 |
+
options=options,
|
135 |
+
on_open=websocket_handler.on_open,
|
136 |
+
on_message=websocket_handler.on_message,
|
137 |
+
on_close=websocket_handler.on_close,
|
138 |
+
on_error=websocket_handler.on_error
|
139 |
+
)
|
140 |
+
|
141 |
+
# Enter the context manager
|
142 |
+
socket = await connection.__aenter__()
|
143 |
+
|
144 |
+
websocket_handler.set_socket(socket)
|
145 |
+
cl.user_session.set("hume_socket", socket)
|
146 |
+
# Store the connection context manager to close it properly later
|
147 |
+
cl.user_session.set("hume_connection", connection)
|
148 |
+
logger.info("Connected to Hume.ai realtime")
|
149 |
+
return True
|
150 |
+
except Exception as e:
|
151 |
+
await cl.ErrorMessage(content=f"Failed to connect to Hume.ai realtime: {e}").send()
|
152 |
+
return False
|
153 |
+
|
154 |
+
@cl.on_audio_chunk
|
155 |
+
async def on_audio_chunk(chunk: cl.InputAudioChunk):
|
156 |
+
socket = cl.user_session.get("hume_socket")
|
157 |
+
websocket_handler = cl.user_session.get("hume_websocket_handler")
|
158 |
+
if socket and websocket_handler:
|
159 |
+
# # Get or create byte stream
|
160 |
+
# if not hasattr(websocket_handler, "byte_stream"):
|
161 |
+
# websocket_handler.byte_stream = websocket_handler.byte_strs
|
162 |
+
|
163 |
+
# Start microphone interface if not already started
|
164 |
+
if not hasattr(websocket_handler, "microphone_task"):
|
165 |
+
websocket_handler.microphone_task = asyncio.create_task(
|
166 |
+
MicrophoneInterface.start(
|
167 |
+
socket,
|
168 |
+
allow_user_interrupt=True,
|
169 |
+
byte_stream=websocket_handler.byte_strs,
|
170 |
+
)
|
171 |
+
)
|
172 |
+
await websocket_handler.microphone_task
|
173 |
+
|
174 |
+
# Send audio chunk to the byte stream
|
175 |
+
# await websocket_handler.byte_stream.put(chunk.data)
|
176 |
+
else:
|
177 |
+
logger.info("Hume.ai socket is not connected")
|
178 |
+
|
179 |
+
@cl.on_audio_end
|
180 |
+
@cl.on_chat_end
|
181 |
+
@cl.on_stop
|
182 |
+
async def on_end():
|
183 |
+
connection = cl.user_session.get("hume_connection")
|
184 |
+
if connection:
|
185 |
+
await connection.__aexit__(None, None, None)
|
186 |
+
cl.user_session.set("hume_socket", None)
|
187 |
+
cl.user_session.set("hume_connection", None)
|
app_old.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import asyncio
|
3 |
+
from quickstart import WebSocketHandler, AsyncHumeClient, ChatConnectOptions, MicrophoneInterface, SubscribeEvent
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import chainlit as cl
|
7 |
+
|
8 |
+
# Page config
|
9 |
+
st.set_page_config(
|
10 |
+
page_title="Hume.ai Voice Chat",
|
11 |
+
page_icon="🎤",
|
12 |
+
layout="centered"
|
13 |
+
)
|
14 |
+
|
15 |
+
st.title("Hume.ai Voice Chat Demo")
|
16 |
+
|
17 |
+
# Load environment variables
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
async def run_chat():
|
21 |
+
# Initialize client and handlers
|
22 |
+
client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
|
23 |
+
options = ChatConnectOptions(
|
24 |
+
config_id=os.getenv("HUME_CONFIG_ID"),
|
25 |
+
secret_key=os.getenv("HUME_SECRET_KEY")
|
26 |
+
)
|
27 |
+
|
28 |
+
# Create a custom WebSocketHandler that updates Chainlit
|
29 |
+
class ChainlitWebSocketHandler(WebSocketHandler):
|
30 |
+
async def on_message(self, message: SubscribeEvent):
|
31 |
+
await super().on_message(message)
|
32 |
+
|
33 |
+
if message.type in ["user_message", "assistant_message"]:
|
34 |
+
role = message.message.role
|
35 |
+
message_text = message.message.content
|
36 |
+
|
37 |
+
# Create emotion text if available
|
38 |
+
emotion_text = ""
|
39 |
+
if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
|
40 |
+
scores = dict(message.models.prosody.scores)
|
41 |
+
top_3_emotions = self._extract_top_n_emotions(scores, 3)
|
42 |
+
emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
|
43 |
+
|
44 |
+
# Send message to Chainlit
|
45 |
+
content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
|
46 |
+
await cl.Message(
|
47 |
+
content=content,
|
48 |
+
author=role.capitalize()
|
49 |
+
).send()
|
50 |
+
|
51 |
+
websocket_handler = ChainlitWebSocketHandler()
|
52 |
+
|
53 |
+
async with client.empathic_voice.chat.connect_with_callbacks(
|
54 |
+
options=options,
|
55 |
+
on_open=websocket_handler.on_open,
|
56 |
+
on_message=websocket_handler.on_message,
|
57 |
+
on_close=websocket_handler.on_close,
|
58 |
+
on_error=websocket_handler.on_error
|
59 |
+
) as socket:
|
60 |
+
websocket_handler.set_socket(socket)
|
61 |
+
|
62 |
+
# Create microphone interface task
|
63 |
+
microphone_task = asyncio.create_task(
|
64 |
+
MicrophoneInterface.start(
|
65 |
+
socket,
|
66 |
+
allow_user_interrupt=False,
|
67 |
+
byte_stream=websocket_handler.byte_strs
|
68 |
+
)
|
69 |
+
)
|
70 |
+
|
71 |
+
await microphone_task
|
72 |
+
|
73 |
+
@cl.on_chat_start
|
74 |
+
async def start():
|
75 |
+
await cl.Message(content="Welcome to the Hume.ai Voice Chat Demo! Click p to chat.").send()
|
76 |
+
|
77 |
+
@cl.on_audio_chunk
|
78 |
+
@cl.on_audio_start
|
79 |
+
async def on_audio():
|
80 |
+
await run_chat()
|
81 |
+
|
82 |
+
@cl.on_audio_end
|
83 |
+
@cl.on_chat_end
|
84 |
+
@cl.on_stop
|
85 |
+
async def on_end():
|
86 |
+
connection = cl.user_session.get("hume_connection")
|
87 |
+
if connection:
|
88 |
+
await connection.__aexit__(None, None, None)
|
89 |
+
cl.user_session.set("hume_socket", None)
|
90 |
+
cl.user_session.set("hume_connection", None)
|