Spaces:

pedrottic
/

medbuddy

Paused

App Files Files Community

pedrottic commited on Jul 20

Commit

1860fb0

1 Parent(s): aa7f27c

first commit

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +231 -4
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -8,7 +8,7 @@ sdk_version: 5.38.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: Open-Surce Intelligent Transcription for medical encounters
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+short_description: Open-Source Intelligent Transcription for medical encounters
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,7 +1,234 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+# app.py – Transcrição Inteligente de Consultas Médicas em Tempo Real
+# Autor: OpenAI ChatGPT (o3)
+"""
+Este aplicativo Gradio roda em um Hugging Face Space e demonstra:
+  • Captura de áudio do microfone em tempo real
+  • Envio de chunks para a Realtime API da OpenAI (modelo gpt‑4o‑transcribe)
+  • Exibição da transcrição ao vivo
+  • Resumo de ~60 s em bullet points
+  • Geração de nota SOAP final
+  • Download do áudio e botão para copiar texto
+⚠️ Este código é um protótipo de referência. Em produção, trate PHI com rigor, use
+chaves de API via Secrets do Space e adicione controle de erros mais robusto.
+"""
+import asyncio
+import json
+import os
+import tempfile
+import time
+from datetime import datetime
 import gradio as gr
+import numpy as np
+import openai
+import soundfile as sf
+import websockets
+# -------------------------------------------------------
+# Configuração
+# -------------------------------------------------------
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+if not OPENAI_API_KEY:
+    raise RuntimeError("Defina OPENAI_API_KEY nas variáveis de ambiente ou Secrets do HF Space!")
+openai.api_key = OPENAI_API_KEY
+STT_MODEL = "gpt-4o-realtime-preview-2025-06-03"  # modelo de transcrição
+SUMMARY_MODEL = "gpt-4o-mini"                      # bullet points minuto a minuto
+SOAP_MODEL = "gpt-4o"                              # sumário final SOAP
+SAMPLE_RATE = 16000                                # Hz, mono
+SUMMARY_EVERY_SEC = 60                             # janela de resumo
+# -------------------------------------------------------
+# Estado de sessão (single‑user Space simplificado)
+# -------------------------------------------------------
+class SessionState:
+    def __init__(self):
+        self.ws = None               # conexão WebSocket com API Realtime
+        self.running = False         # flag de captura
+        self.transcript_full = ""     # transcrição acumulada
+        self.bullets = []            # lista de bullets
+        self.last_summary_ts = time.time()
+        self.audio_chunks = []       # lista de arrays numpy
+        self.contexto = ""           # texto inserido pelo usuário
+state = SessionState()
+# -------------------------------------------------------
+# Funções auxiliares – OpenAI Realtime API
+# -------------------------------------------------------
+async def open_realtime_ws() -> websockets.WebSocketClientProtocol:
+    """Abre e retorna a conexão WebSocket com a Realtime API."""
+    uri = f"wss://api.openai.com/v1/realtime?model={STT_MODEL}"
+    ws = await websockets.connect(
+        uri,
+        extra_headers={
+            "Authorization": f"Bearer {OPENAI_API_KEY}",
+            "OpenAI-Beta": "realtime=v1",
+        },
+        subprotocols=["realtime"],
+        max_size=1 * 1024 * 1024,  # 1 MB
+    )
+    # Primeiro evento deve ser session.created
+    evt = json.loads(await ws.recv())
+    if evt.get("type") != "session.created":
+        raise RuntimeError(f"Evento inicial inesperado: {evt}")
+    return ws
+async def pcm_from_numpy(chunk: np.ndarray) -> bytes:
+    """Converte numpy float32 (-1..1) → bytes PCM 16‑bit LE."""
+    if chunk.dtype != np.float32:
+        chunk = chunk.astype(np.float32)
+    pcm16 = (np.clip(chunk, -1, 1) * 32767).astype(np.int16)
+    return pcm16.tobytes()
+async def send_audio_chunk(chunk: np.ndarray, ws: websockets.WebSocketClientProtocol):
+    await ws.send(await pcm_from_numpy(chunk))
+# -------------------------------------------------------
+# Funções de resumo / SOAP (Chat Completions)
+# -------------------------------------------------------
+async def summarize_block(text: str) -> str:
+    """Gera até 5 bullet points em PT‑BR para o bloco de texto fornecido."""
+    rsp = openai.chat.completions.create(
+        model=SUMMARY_MODEL,
+        messages=[
+            {
+                "role": "system",
+                "content": "Você é escriba clínico. Resuma o texto a seguir em até 5 bullet points concisos, em português.",
+            },
+            {"role": "user", "content": text},
+        ],
+        temperature=0.3,
+    )
+    return rsp.choices[0].message.content.strip()
+async def generate_soap(full_txt: str, bullets: list[str], contexto: str) -> str:
+    """Combina transcript + bullets + contexto para gerar nota SOAP final."""
+    rsp = openai.chat.completions.create(
+        model=SOAP_MODEL,
+        messages=[
+            {"role": "system", "content": "Você é um escriba médico sênior."},
+            {"role": "user", "content": f"Contexto: {contexto}"},
+            {"role": "assistant", "content": "\n".join(bullets)},
+            {
+                "role": "user",
+                "content": (
+                    "Transcrição completa a seguir. Elabore a nota final no formato SOAP, em português, "
+                    "utilizando os bullet points como guia.\n\n" + full_txt
+                ),
+            },
+        ],
+        temperature=0.2,
+    )
+    return rsp.choices[0].message.content.strip()
+# -------------------------------------------------------
+# Callbacks Gradio
+# -------------------------------------------------------
+async def cb_start(contexto: str):
+    """Inicia gravação: abre WS, reseta estados."""
+    if state.running:
+        return gr.update(value="Já gravando...")
+    state.__init__()  # reset
+    state.contexto = contexto
+    state.running = True
+    state.ws = await open_realtime_ws()
+    state.last_summary_ts = time.time()
+    return gr.update(value="Gravando… (clique em Finalizar para encerrar)")
+async def cb_stream(audio_chunk, live_txt, live_sum):
+    """Callback contínuo do componente de microfone (streaming=True)."""
+    if not state.running or audio_chunk is None:
+        return live_txt, live_sum
+    # Garantir mono
+    if audio_chunk.ndim == 2:
+        audio_chunk = audio_chunk.mean(axis=1)
+    # Envia chunk para a API e guarda localmente
+    await send_audio_chunk(audio_chunk, state.ws)
+    state.audio_chunks.append(audio_chunk)
+    # Tenta ler rapidamente novos transcripts (non‑blocking)
+    try:
+        for _ in range(5):
+            msg = await asyncio.wait_for(state.ws.recv(), timeout=0.01)
+            evt = json.loads(msg)
+            if evt.get("type") == "transcript":
+                txt = evt["transcript"]["text"]
+                state.transcript_full += txt + " "
+    except (asyncio.TimeoutError, websockets.exceptions.ConnectionClosedOK):
+        pass
+    # Resumo a cada SUMMARY_EVERY_SEC
+    now = time.time()
+    if now - state.last_summary_ts >= SUMMARY_EVERY_SEC:
+        bullet = await summarize_block(state.transcript_full[-4000:])
+        state.bullets.append(bullet)
+        state.last_summary_ts = now
+    live_summary_md = "\n\n".join(state.bullets)
+    return state.transcript_full, live_summary_md
+async def cb_stop():
+    """Finaliza gravação, gera SOAP e disponibiliza download do áudio."""
+    if not state.running:
+        return "", "", ""
+    state.running = False
+    if state.ws:
+        await state.ws.close()
+    # Garantir último resumo se necessário
+    if state.transcript_full and (not state.bullets or (time.time() - state.last_summary_ts) > 15):
+        bullet = await summarize_block(state.transcript_full[-4000:])
+        state.bullets.append(bullet)
+    soap = await generate_soap(state.transcript_full, state.bullets, state.contexto)
+    # Salvar áudio
+    wav_path = tempfile.mktemp(suffix=".wav", prefix="consulta_")
+    if state.audio_chunks:
+        audio_np = np.concatenate(state.audio_chunks)
+        sf.write(wav_path, audio_np, SAMPLE_RATE)
+    # Botões de download/cópia
+    download_link = f"<a href='file={wav_path}' download>Baixar áudio (.wav)</a>"
+    copy_btn = (
+        "<button onclick=\"navigator.clipboard.writeText("\" + "`" + soap.replace("`", "\\`") + "`" + ")\">Copiar SOAP</button>"
+    )
+    soap_html = f"<h3>Nota SOAP</h3><pre>{soap}</pre>{download_link}<br>{copy_btn}"
+    return state.transcript_full, "\n\n".join(state.bullets), soap_html
+# -------------------------------------------------------
+# Interface Gradio
+# -------------------------------------------------------
+with gr.Blocks(title="Transcrição Inteligente – Demo") as demo:
+    gr.Markdown("## Transcrição inteligente de consultas médicas em tempo real")
+    with gr.Row():
+        contexto_txt = gr.Textbox(label="Contexto da consulta (opcional)", lines=2, placeholder="Ex.: Paciente com dispneia crônica...")
+        btn_start = gr.Button("Iniciar", variant="primary")
+        btn_stop = gr.Button("Finalizar", variant="stop")
+    with gr.Row():
+        md_transcript = gr.Markdown("", label="Transcrição em tempo real")
+        md_summary = gr.Markdown("", label="Resumo (bullet points)")
+    md_soap = gr.HTML("", label="Nota SOAP final")
+    mic = gr.Audio(source="microphone", type="numpy", streaming=True, label="Microfone (16 kHz)")
+    # Eventos
+    btn_start.click(cb_start, inputs=[contexto_txt], outputs=[btn_start])
+    mic.stream(cb_stream, inputs=[mic, md_transcript, md_summary], outputs=[md_transcript, md_summary])
+    btn_stop.click(cb_stop, inputs=None, outputs=[md_transcript, md_summary, md_soap])
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai>=1.24.0
+gradio>=4.27.0
+websockets>=12.0
+soundfile>=0.12.1
+numpy>=1.26.0