Spaces:

ejjocko
/

dr-q-bot-multimodal

Sleeping

App Files Files Community

jocko commited on Aug 9

Commit

0a89a37

1 Parent(s): 4a259f2

copy updates of mult modal

Browse files

Files changed (2) hide show

README.md +3 -4
src/streamlit_app.py +103 -140

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Dr Q Bot Multimodal
 emoji: 🚀
 colorFrom: red
 colorTo: red
@@ -8,12 +8,11 @@ app_port: 8501
 tags:
 - streamlit
 pinned: false
-short_description: multimodal
 ---
 # Welcome to Streamlit!
 Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 ---
+title: Dr Q
 emoji: 🚀
 colorFrom: red
 colorTo: red
 tags:
 - streamlit
 pinned: false
+short_description: Multimodal medical chatbot
 ---
 # Welcome to Streamlit!
 Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
+forums](https://discuss.streamlit.io).

src/streamlit_app.py CHANGED Viewed

@@ -1,164 +1,127 @@
 import os
-# ✅ Set all relevant cache directories to a writable location
-os.environ["HF_HOME"] = "/tmp/cache"
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/cache/transformers"
-os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/cache/sentence_transformers"
-os.environ["HF_DATASETS_CACHE"] = "/tmp/cache/hf_datasets"
-os.environ["TORCH_HOME"] = "/tmp/cache/torch"
-# ✅ Create the directories if they don't exist
-for path in [
-    "/tmp/cache",
-    "/tmp/cache/transformers",
-    "/tmp/cache/sentence_transformers",
-    "/tmp/cache/hf_datasets",
-    "/tmp/cache/torch"
-]:
-    os.makedirs(path, exist_ok=True)
-import json
 import torch
-import openai
-import os
 from sentence_transformers import SentenceTransformer, util
-import streamlit as st
-from pathlib import Path
-# === CONFIG ===
-# Set the API key
-client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# openai.api_key = os.getenv("OPENAI_API_KEY")
-# REMEDI_PATH = "ReMeDi-base.json"
-BASE_DIR = Path(__file__).parent
-REMEDI_PATH = BASE_DIR / "ReMeDi-base.json"
-# Check if file exists
-if not REMEDI_PATH.exists():
-    raise FileNotFoundError(f"❌ File not found: {REMEDI_PATH}")
-# Load the file
-with open(REMEDI_PATH, "r", encoding="utf-8") as f:
-    data = json.load(f)
-# === LOAD MODEL ===
-@st.cache_resource
-def load_model():
-    return SentenceTransformer("all-MiniLM-L6-v2")
-    # return model
-@st.cache_resource
-def load_data():
-    with open(REMEDI_PATH, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    dialogue_pairs = []
-    for conversation in data:
-        turns = conversation["information"]
-        for i in range(len(turns) - 1):
-            if turns[i]["role"] == "patient" and turns[i + 1]["role"] == "doctor":
-                dialogue_pairs.append({
-                    "patient": turns[i]["sentence"],
-                    "doctor": turns[i + 1]["sentence"]
-                })
-    return dialogue_pairs
-@st.cache_data
-def build_embeddings(dialogue_pairs, _model):
-    patient_sentences = [pair["patient"] for pair in dialogue_pairs]
-    embeddings = _model.encode(patient_sentences, convert_to_tensor=True)
-    return embeddings
-# === TRANSLATE USING GPT ===
-def translate_to_english(chinese_text):
-    prompt = f"Translate the following Chinese medical response to English:\n\n{chinese_text}"
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4",
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.2
-        )
-        return response.choices[0].message.content
-        # return response.choices[0].message["content"].strip()
-    except Exception as e:
-        return f"Translation failed: {str(e)}"
-def gpt_direct_response(user_input):
-    prompt = f"You are a knowledgeable and compassionate medical assistant. Answer the following patient question clearly and concisely:\n\n{user_input}"
-    try:
-        response = client.chat.completions.create(
-            model="gpt-4",  # or "gpt-3.5-turbo" to save credits
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.5
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"GPT response failed: {str(e)}"
-# === CHATBOT FUNCTION ===
-def chatbot_response(user_input, _model, dialogue_pairs, patient_embeddings, top_k=1):
-    user_embedding = _model.encode(user_input, convert_to_tensor=True)
-    similarities = util.cos_sim(user_embedding, patient_embeddings)[0]
-    top_score, top_idx = torch.topk(similarities, k=1)
-    top_score = top_score.item()
-    top_idx = torch.topk(similarities, k=top_k).indices[0].item()
-    match = dialogue_pairs[top_idx]
-    translated = translate_to_english(match["doctor"])
-    return {
-        "matched_question": match["patient"],
-        "original_response": match["doctor"],
-        "translated_response": translated
-        # "similarity_score": top_score
-    }
-# === MAIN APP ===
-st.set_page_config(page_title="Dr_Q_bot", layout="centered")
-st.title("🩺 Dr_Q_bot - Medical Chatbot")
-st.write("Ask about a symptom and get an example doctor response (translated from Chinese).")
-# Load resources
-model = load_model()
-dialogue_pairs = load_data()
-patient_embeddings = build_embeddings(dialogue_pairs, model)
-# Chat UI
-user_input = st.text_input("Describe your symptom:")
-if st.button("Submit") and user_input:
-    with st.spinner("Thinking..."):
-        result = chatbot_response(user_input, model, dialogue_pairs, patient_embeddings)
-        gpt_response = gpt_direct_response(user_input)
-        st.markdown("## ✅ GPT-4 Doctor's Response")
-        st.success(gpt_response)
-        # if torch.max(similarities).item() < 0.4:
-        st.markdown("## 🔁 Example Historical Dialogue")
-        st.markdown("### 🧑‍⚕️ Closest Patient Question")
-        st.write(result["matched_question"])
-        st.markdown("### 🇨🇳 Original Doctor Response (Chinese)")
-        st.write(result["original_response"])
-        st.markdown("### 🌐 Translated Doctor Response (English)")
-        st.success(result["translated_response"])
-        # else:
-        #    st.warning("No close match found in dataset. Using GPT response only.")
-        # st.markdown("### 💬 GPT Doctor Response (AI-generated)")
-        # st.info(gpt_response)
-        # Skip dataset result
-        st.markdown("---")
-        st.warning(
-            "This chatbot uses real dialogue data for research and educational use only. Not a substitute for professional medical advice.")

+# ================================
+#   ✅ Cache-Safe Multimodal App
+# ================================
 import os
+# ====== Force all cache dirs to /tmp (writable in most environments) ======
+CACHE_BASE = "/tmp/cache"
+os.environ["HF_HOME"] = f"{CACHE_BASE}/hf_home"
+os.environ["TRANSFORMERS_CACHE"] = f"{CACHE_BASE}/transformers"
+os.environ["SENTENCE_TRANSFORMERS_HOME"] = f"{CACHE_BASE}/sentence_transformers"
+os.environ["HF_DATASETS_CACHE"] = f"{CACHE_BASE}/hf_datasets"
+os.environ["TORCH_HOME"] = f"{CACHE_BASE}/torch"
+os.environ["STREAMLIT_CACHE_DIR"] = f"{CACHE_BASE}/streamlit_cache"
+os.environ["STREAMLIT_STATIC_DIR"] = f"{CACHE_BASE}/streamlit_static"
+# Create the directories before imports
+for path in os.environ.values():
+    if path.startswith(CACHE_BASE):
+        os.makedirs(path, exist_ok=True)
+# ====== Imports ======
+import streamlit as st
 import torch
 from sentence_transformers import SentenceTransformer, util
+from transformers import CLIPProcessor, CLIPModel
+from datasets import load_dataset, get_dataset_split_names
+from PIL import Image
+import openai
+# ========== 🔑 API Key ==========
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# ========== 📥 Load Models ==========
+@st.cache_resource(show_spinner=False)
+def load_models():
+    clip_model = CLIPModel.from_pretrained(
+        "openai/clip-vit-base-patch32",
+        cache_dir=os.environ["TRANSFORMERS_CACHE"]
+    )
+    clip_processor = CLIPProcessor.from_pretrained(
+        "openai/clip-vit-base-patch32",
+        cache_dir=os.environ["TRANSFORMERS_CACHE"]
+    )
+    text_model = SentenceTransformer(
+        "all-MiniLM-L6-v2",
+        cache_folder=os.environ["SENTENCE_TRANSFORMERS_HOME"]
+    )
+    return clip_model, clip_processor, text_model
+clip_model, clip_processor, text_model = load_models()
+# ========== 📥 Load Dataset ==========
+@st.cache_resource(show_spinner=False)
+def load_medical_data():
+    available_splits = get_dataset_split_names("univanxx/3mdbench")
+    split_to_use = "train" if "train" in available_splits else available_splits[0]
+    dataset = load_dataset(
+        "univanxx/3mdbench",
+        split=split_to_use,
+        cache_dir=os.environ["HF_DATASETS_CACHE"]
+    )
+    return dataset
+data = load_medical_data()
+# Temporary debug display
+#st.write("Dataset columns:", data.features.keys())
+# After seeing the real column name, let's say it's "text" instead of "description":
+text_field = "text" if "text" in data.features else list(data.features.keys())[0]
+# Then use dynamic access:
+#text_embeddings = embed_texts(data[text_field])
+# ========== 🧠 Embedding Function ==========
+@st.cache_data(show_spinner=False)
+def embed_texts(_texts):
+    return text_model.encode(_texts, convert_to_tensor=True)
+# Pick which text column to use
+TEXT_COLUMN = "complaints"  # or "general_complaint", depending on your needs
+# ========== 🧑‍⚕️ App UI ==========
+st.title("🩺 Multimodal Medical Chatbot")
+query = st.text_input("Enter your medical question or symptom description:")
+if query:
+    with st.spinner("Searching medical cases..."):
+        text_embeddings = embed_texts(data[TEXT_COLUMN])
+        query_embedding = embed_texts([query])[0]
+        # Compute similarity
+        cos_scores = util.pytorch_cos_sim(query_embedding, text_embeddings)[0]
+        top_result = torch.topk(cos_scores, k=1)
+        idx = top_result.indices[0].item()
+        selected = data[idx]
+        # Show Image
+        st.image(selected['image'], caption="Most relevant medical image", use_container_width=True)
+        # Show Text
+        st.markdown(f"**Case Description:** {selected[TEXT_COLUMN]}")
+        # GPT Explanation
+        if openai.api_key:
+            prompt = f"Explain this case in plain English: {selected[TEXT_COLUMN]}"
+            from openai import OpenAI
+            client = OpenAI(api_key=openai.api_key)
+            response = client.chat.completions.create(
+            model="gpt-4o",  # or "gpt-4" if you need the older GPT-4
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.5,
+            max_tokens=150
+            )
+            explanation = response.choices[0].message.content
+            st.markdown(f"### 🤖 Explanation by GPT:\n{explanation}")
+        else:
+            st.warning("OpenAI API key not found. Please set OPENAI_API_KEY as a secret environment variable.")
+st.caption("This chatbot is for educational purposes only and does not provide medical advice.")