Dirk Haupt commited on
Commit
f0e07cf
·
1 Parent(s): aa6b9fb

test if voice works on hf if microphone access is requested

Browse files
Files changed (2) hide show
  1. app.py +154 -47
  2. app_old.py +90 -0
app.py CHANGED
@@ -4,6 +4,8 @@ from quickstart import WebSocketHandler, AsyncHumeClient, ChatConnectOptions, Mi
4
  import os
5
  from dotenv import load_dotenv
6
  import chainlit as cl
 
 
7
 
8
  # Page config
9
  st.set_page_config(
@@ -17,64 +19,169 @@ st.title("Hume.ai Voice Chat Demo")
17
  # Load environment variables
18
  load_dotenv()
19
 
20
- async def run_chat():
21
- # Initialize client and handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
23
  options = ChatConnectOptions(
24
  config_id=os.getenv("HUME_CONFIG_ID"),
25
  secret_key=os.getenv("HUME_SECRET_KEY")
26
  )
27
 
28
- # Create a custom WebSocketHandler that updates Chainlit
29
- class ChainlitWebSocketHandler(WebSocketHandler):
30
- async def on_message(self, message: SubscribeEvent):
31
- await super().on_message(message)
32
-
33
- if message.type in ["user_message", "assistant_message"]:
34
- role = message.message.role
35
- message_text = message.message.content
36
-
37
- # Create emotion text if available
38
- emotion_text = ""
39
- if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
40
- scores = dict(message.models.prosody.scores)
41
- top_3_emotions = self._extract_top_n_emotions(scores, 3)
42
- emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
43
-
44
- # Send message to Chainlit
45
- content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
46
- await cl.Message(
47
- content=content,
48
- author=role.capitalize()
49
- ).send()
50
-
51
  websocket_handler = ChainlitWebSocketHandler()
 
52
 
53
- async with client.empathic_voice.chat.connect_with_callbacks(
54
- options=options,
55
- on_open=websocket_handler.on_open,
56
- on_message=websocket_handler.on_message,
57
- on_close=websocket_handler.on_close,
58
- on_error=websocket_handler.on_error
59
- ) as socket:
60
- websocket_handler.set_socket(socket)
61
-
62
- # Create microphone interface task
63
- microphone_task = asyncio.create_task(
64
- MicrophoneInterface.start(
65
- socket,
66
- allow_user_interrupt=False,
67
- byte_stream=websocket_handler.byte_strs
68
- )
69
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- await microphone_task
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  @cl.on_chat_start
74
  async def start():
75
- await cl.Message(content="Welcome to the Hume.ai Voice Chat Demo! Click p to chat.").send()
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- @cl.on_audio_chunk
78
  @cl.on_audio_start
79
- async def on_audio():
80
- await run_chat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import os
5
  from dotenv import load_dotenv
6
  import chainlit as cl
7
+ # from uuid import uuid4
8
+ from chainlit.logger import logger
9
 
10
  # Page config
11
  st.set_page_config(
 
19
  # Load environment variables
20
  load_dotenv()
21
 
22
+ # Create a custom WebSocketHandler that updates Chainlit
23
+ class ChainlitWebSocketHandler(WebSocketHandler):
24
+ async def on_message(self, message: SubscribeEvent):
25
+ await super().on_message(message)
26
+
27
+ if message.type in ["user_message", "assistant_message"]:
28
+ role = message.message.role
29
+ message_text = message.message.content
30
+
31
+ # Create emotion text if available
32
+ emotion_text = ""
33
+ if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
34
+ scores = dict(message.models.prosody.scores)
35
+ top_3_emotions = self._extract_top_n_emotions(scores, 3)
36
+ emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
37
+
38
+ # Send message to Chainlit
39
+ content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
40
+ await cl.Message(
41
+ content=content,
42
+ author=role.capitalize()
43
+ ).send()
44
+
45
+
46
+ async def setup_hume_realtime():
47
+ """Instantiate and configure the Hume Realtime Client"""
48
  client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
49
  options = ChatConnectOptions(
50
  config_id=os.getenv("HUME_CONFIG_ID"),
51
  secret_key=os.getenv("HUME_SECRET_KEY")
52
  )
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  websocket_handler = ChainlitWebSocketHandler()
55
+ # cl.user_session.set("track_id", str(uuid4()))
56
 
57
+ # async def handle_conversation_updated(message):
58
+ # """Currently used to stream responses back to the client."""
59
+ # if message.type == "audio_output":
60
+ # # Handle audio streaming
61
+ # message_bytes = base64.b64decode(message.data.encode("utf-8"))
62
+ # await cl.context.emitter.send_audio_chunk(
63
+ # cl.OutputAudioChunk(
64
+ # mimeType="pcm16",
65
+ # data=message_bytes,
66
+ # track=cl.user_session.get("track_id")
67
+ # )
68
+ # )
69
+ # elif message.type in ["user_message", "assistant_message"]:
70
+ # # Handle text and emotion data
71
+ # role = message.message.role
72
+ # message_text = message.message.content
73
+
74
+ # # Create emotion text if available
75
+ # emotion_text = ""
76
+ # if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
77
+ # scores = dict(message.models.prosody.scores)
78
+ # top_3_emotions = websocket_handler._extract_top_n_emotions(scores, 3)
79
+ # emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
80
+
81
+ # # Send message to Chainlit
82
+ # content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
83
+ # await cl.Message(
84
+ # content=content,
85
+ # author=role.capitalize()
86
+ # ).send()
87
+
88
+ # async def handle_conversation_interrupt(event):
89
+ # """Used to cancel the client previous audio playback."""
90
+ # cl.user_session.set("track_id", str(uuid4()))
91
+ # await cl.context.emitter.send_audio_interrupt()
92
 
93
+ # async def handle_error(error):
94
+ # logger.error(error)
95
+ # await cl.Message(content=f"Error: {str(error)}").send()
96
+
97
+ # Override the websocket handler's methods
98
+ # websocket_handler.on_message = handle_conversation_updated
99
+ # websocket_handler.on_error = handle_error
100
+
101
+ # Store the handler in the session
102
+ cl.user_session.set("hume_websocket_handler", websocket_handler)
103
+ cl.user_session.set("hume_client", client)
104
+ cl.user_session.set("hume_options", options)
105
+
106
 
107
  @cl.on_chat_start
108
  async def start():
109
+ await cl.Message(
110
+ content="Welcome to the Chainlit x Hume.ai realtime example. Press `P` to talk!"
111
+ ).send()
112
+ await setup_hume_realtime()
113
+
114
+ # @cl.on_message
115
+ # async def on_message(message: cl.Message):
116
+ # socket = cl.user_session.get("hume_socket")
117
+ # if socket and socket.is_connected():
118
+ # await socket.send_user_input(message.content)
119
+ # else:
120
+ # await cl.Message(content="Please activate voice mode before sending messages!").send()
121
 
 
122
  @cl.on_audio_start
123
+ async def on_audio_start():
124
+ try:
125
+ client = cl.user_session.get("hume_client")
126
+ options = cl.user_session.get("hume_options")
127
+ websocket_handler = cl.user_session.get("hume_websocket_handler")
128
+
129
+ if not all([client, options, websocket_handler]):
130
+ raise Exception("Hume.ai client not properly initialized!")
131
+
132
+ # Create a new context manager
133
+ connection = client.empathic_voice.chat.connect_with_callbacks(
134
+ options=options,
135
+ on_open=websocket_handler.on_open,
136
+ on_message=websocket_handler.on_message,
137
+ on_close=websocket_handler.on_close,
138
+ on_error=websocket_handler.on_error
139
+ )
140
+
141
+ # Enter the context manager
142
+ socket = await connection.__aenter__()
143
+
144
+ websocket_handler.set_socket(socket)
145
+ cl.user_session.set("hume_socket", socket)
146
+ # Store the connection context manager to close it properly later
147
+ cl.user_session.set("hume_connection", connection)
148
+ logger.info("Connected to Hume.ai realtime")
149
+ return True
150
+ except Exception as e:
151
+ await cl.ErrorMessage(content=f"Failed to connect to Hume.ai realtime: {e}").send()
152
+ return False
153
+
154
+ @cl.on_audio_chunk
155
+ async def on_audio_chunk(chunk: cl.InputAudioChunk):
156
+ socket = cl.user_session.get("hume_socket")
157
+ websocket_handler = cl.user_session.get("hume_websocket_handler")
158
+ if socket and websocket_handler:
159
+ # # Get or create byte stream
160
+ # if not hasattr(websocket_handler, "byte_stream"):
161
+ # websocket_handler.byte_stream = websocket_handler.byte_strs
162
+
163
+ # Start microphone interface if not already started
164
+ if not hasattr(websocket_handler, "microphone_task"):
165
+ websocket_handler.microphone_task = asyncio.create_task(
166
+ MicrophoneInterface.start(
167
+ socket,
168
+ allow_user_interrupt=True,
169
+ byte_stream=websocket_handler.byte_strs,
170
+ )
171
+ )
172
+ await websocket_handler.microphone_task
173
+
174
+ # Send audio chunk to the byte stream
175
+ # await websocket_handler.byte_stream.put(chunk.data)
176
+ else:
177
+ logger.info("Hume.ai socket is not connected")
178
+
179
+ @cl.on_audio_end
180
+ @cl.on_chat_end
181
+ @cl.on_stop
182
+ async def on_end():
183
+ connection = cl.user_session.get("hume_connection")
184
+ if connection:
185
+ await connection.__aexit__(None, None, None)
186
+ cl.user_session.set("hume_socket", None)
187
+ cl.user_session.set("hume_connection", None)
app_old.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ from quickstart import WebSocketHandler, AsyncHumeClient, ChatConnectOptions, MicrophoneInterface, SubscribeEvent
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import chainlit as cl
7
+
8
+ # Page config
9
+ st.set_page_config(
10
+ page_title="Hume.ai Voice Chat",
11
+ page_icon="🎤",
12
+ layout="centered"
13
+ )
14
+
15
+ st.title("Hume.ai Voice Chat Demo")
16
+
17
+ # Load environment variables
18
+ load_dotenv()
19
+
20
+ async def run_chat():
21
+ # Initialize client and handlers
22
+ client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
23
+ options = ChatConnectOptions(
24
+ config_id=os.getenv("HUME_CONFIG_ID"),
25
+ secret_key=os.getenv("HUME_SECRET_KEY")
26
+ )
27
+
28
+ # Create a custom WebSocketHandler that updates Chainlit
29
+ class ChainlitWebSocketHandler(WebSocketHandler):
30
+ async def on_message(self, message: SubscribeEvent):
31
+ await super().on_message(message)
32
+
33
+ if message.type in ["user_message", "assistant_message"]:
34
+ role = message.message.role
35
+ message_text = message.message.content
36
+
37
+ # Create emotion text if available
38
+ emotion_text = ""
39
+ if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
40
+ scores = dict(message.models.prosody.scores)
41
+ top_3_emotions = self._extract_top_n_emotions(scores, 3)
42
+ emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
43
+
44
+ # Send message to Chainlit
45
+ content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
46
+ await cl.Message(
47
+ content=content,
48
+ author=role.capitalize()
49
+ ).send()
50
+
51
+ websocket_handler = ChainlitWebSocketHandler()
52
+
53
+ async with client.empathic_voice.chat.connect_with_callbacks(
54
+ options=options,
55
+ on_open=websocket_handler.on_open,
56
+ on_message=websocket_handler.on_message,
57
+ on_close=websocket_handler.on_close,
58
+ on_error=websocket_handler.on_error
59
+ ) as socket:
60
+ websocket_handler.set_socket(socket)
61
+
62
+ # Create microphone interface task
63
+ microphone_task = asyncio.create_task(
64
+ MicrophoneInterface.start(
65
+ socket,
66
+ allow_user_interrupt=False,
67
+ byte_stream=websocket_handler.byte_strs
68
+ )
69
+ )
70
+
71
+ await microphone_task
72
+
73
+ @cl.on_chat_start
74
+ async def start():
75
+ await cl.Message(content="Welcome to the Hume.ai Voice Chat Demo! Click p to chat.").send()
76
+
77
+ @cl.on_audio_chunk
78
+ @cl.on_audio_start
79
+ async def on_audio():
80
+ await run_chat()
81
+
82
+ @cl.on_audio_end
83
+ @cl.on_chat_end
84
+ @cl.on_stop
85
+ async def on_end():
86
+ connection = cl.user_session.get("hume_connection")
87
+ if connection:
88
+ await connection.__aexit__(None, None, None)
89
+ cl.user_session.set("hume_socket", None)
90
+ cl.user_session.set("hume_connection", None)