File size: 3,094 Bytes
f0e07cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import asyncio
from quickstart import WebSocketHandler, AsyncHumeClient, ChatConnectOptions, MicrophoneInterface, SubscribeEvent
import os
from dotenv import load_dotenv
import chainlit as cl

# Page config
st.set_page_config(
    page_title="Hume.ai Voice Chat",
    page_icon="🎤",
    layout="centered"
)

st.title("Hume.ai Voice Chat Demo")

# Load environment variables
load_dotenv()

async def run_chat():
    # Initialize client and handlers
    client = AsyncHumeClient(api_key=os.getenv("HUME_API_KEY"))
    options = ChatConnectOptions(
        config_id=os.getenv("HUME_CONFIG_ID"), 
        secret_key=os.getenv("HUME_SECRET_KEY")
    )
    
    # Create a custom WebSocketHandler that updates Chainlit
    class ChainlitWebSocketHandler(WebSocketHandler):
        async def on_message(self, message: SubscribeEvent):
            await super().on_message(message)
            
            if message.type in ["user_message", "assistant_message"]:
                role = message.message.role
                message_text = message.message.content
                
                # Create emotion text if available
                emotion_text = ""
                if message.from_text is False and hasattr(message, 'models') and hasattr(message.models, 'prosody'):
                    scores = dict(message.models.prosody.scores)
                    top_3_emotions = self._extract_top_n_emotions(scores, 3)
                    emotion_text = " | ".join([f"{emotion} ({score:.2f})" for emotion, score in top_3_emotions.items()])
                
                # Send message to Chainlit
                content = f"{message_text}\n\n*Emotions: {emotion_text}*" if emotion_text else message_text
                await cl.Message(
                    content=content,
                    author=role.capitalize()
                ).send()
    
    websocket_handler = ChainlitWebSocketHandler()

    async with client.empathic_voice.chat.connect_with_callbacks(
        options=options,
        on_open=websocket_handler.on_open,
        on_message=websocket_handler.on_message,
        on_close=websocket_handler.on_close,
        on_error=websocket_handler.on_error
    ) as socket:
        websocket_handler.set_socket(socket)
        
        # Create microphone interface task
        microphone_task = asyncio.create_task(
            MicrophoneInterface.start(
                socket,
                allow_user_interrupt=False,
                byte_stream=websocket_handler.byte_strs
            )
        )
        
        await microphone_task

@cl.on_chat_start
async def start():
    await cl.Message(content="Welcome to the Hume.ai Voice Chat Demo! Click p to chat.").send()

@cl.on_audio_chunk
@cl.on_audio_start
async def on_audio():
    await run_chat()

@cl.on_audio_end
@cl.on_chat_end
@cl.on_stop
async def on_end():
    connection = cl.user_session.get("hume_connection")
    if connection:
        await connection.__aexit__(None, None, None)
    cl.user_session.set("hume_socket", None)
    cl.user_session.set("hume_connection", None)