Spaces:
Runtime error
Runtime error
Commit
Β·
304fad8
1
Parent(s):
1be4c03
Initial commit for Hugging Face Space
Browse files- app.py +131 -27
- build_faiss_index.py +0 -39
- dataset_loader.py +16 -9
app.py
CHANGED
@@ -1,49 +1,78 @@
|
|
1 |
import streamlit as st
|
2 |
-
from time import sleep
|
3 |
from retriever import load_vector_store
|
4 |
from langgraph_graph import generate_answer
|
|
|
|
|
|
|
|
|
5 |
|
6 |
st.set_page_config("MedMCQA Chatbot", page_icon="π©Ί")
|
7 |
|
8 |
-
# π
|
9 |
with st.sidebar:
|
10 |
st.title("π©Ί MedMCQA Chatbot")
|
11 |
theme_mode = st.radio("π Theme", ["Light", "Dark"], horizontal=True)
|
12 |
|
13 |
-
# π
|
14 |
if theme_mode == "Dark":
|
15 |
st.markdown("""
|
16 |
<style>
|
17 |
-
|
18 |
-
.
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
</style>
|
22 |
""", unsafe_allow_html=True)
|
23 |
else:
|
24 |
st.markdown("""
|
25 |
<style>
|
26 |
-
|
27 |
-
.
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
</style>
|
31 |
""", unsafe_allow_html=True)
|
32 |
|
|
|
33 |
st.header("π©Ί MedMCQA Chatbot")
|
34 |
st.caption("Ask a medical question and get answers from the MedMCQA dataset only. If not found, it will respond gracefully.")
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
db = get_vector_store()
|
43 |
-
|
44 |
-
# βοΈ Query
|
45 |
-
query = st.text_input("π Enter your medical question:",
|
46 |
-
placeholder="e.g., What is the mechanism of Aspirin?")
|
47 |
|
48 |
# π Answer generation
|
49 |
if query:
|
@@ -53,18 +82,32 @@ if query:
|
|
53 |
with st.spinner("π§ Generating answer..."):
|
54 |
response = generate_answer(query, context)
|
55 |
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
answer_placeholder = st.empty()
|
59 |
final_text = ""
|
60 |
for char in response:
|
61 |
final_text += char
|
62 |
-
answer_placeholder.markdown(final_text)
|
63 |
sleep(0.01)
|
64 |
|
65 |
with st.expander("π Top Matches"):
|
66 |
for i, doc in enumerate(results, 1):
|
67 |
-
|
|
|
|
|
|
|
68 |
|
69 |
# π¬ Sidebar Contact
|
70 |
with st.sidebar:
|
@@ -72,4 +115,65 @@ with st.sidebar:
|
|
72 |
st.markdown("### π¬ Contact")
|
73 |
st.markdown("[π§ Email](mailto:[email protected])")
|
74 |
st.markdown("[π LinkedIn](https://linkedin.com/in/sankethhonavar)")
|
75 |
-
st.markdown("[π» GitHub](https://github.com/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
2 |
from retriever import load_vector_store
|
3 |
from langgraph_graph import generate_answer
|
4 |
+
from time import sleep
|
5 |
+
|
6 |
+
# Load vector DB
|
7 |
+
db = load_vector_store()
|
8 |
|
9 |
st.set_page_config("MedMCQA Chatbot", page_icon="π©Ί")
|
10 |
|
11 |
+
# π Theme toggle sidebar
|
12 |
with st.sidebar:
|
13 |
st.title("π©Ί MedMCQA Chatbot")
|
14 |
theme_mode = st.radio("π Theme", ["Light", "Dark"], horizontal=True)
|
15 |
|
16 |
+
# π Apply selected theme
|
17 |
if theme_mode == "Dark":
|
18 |
st.markdown("""
|
19 |
<style>
|
20 |
+
:root { --text-color: #eee; }
|
21 |
+
body, .stApp {
|
22 |
+
background-color: #1e1e1e !important;
|
23 |
+
color: var(--text-color) !important;
|
24 |
+
}
|
25 |
+
.stTextInput input {
|
26 |
+
background-color: #333 !important;
|
27 |
+
color: var(--text-color) !important;
|
28 |
+
}
|
29 |
+
.stTextInput label {
|
30 |
+
color: var(--text-color) !important;
|
31 |
+
}
|
32 |
+
input::placeholder {
|
33 |
+
color: #bbb !important;
|
34 |
+
}
|
35 |
+
.stButton>button {
|
36 |
+
background-color: #444 !important;
|
37 |
+
color: var(--text-color) !important;
|
38 |
+
}
|
39 |
</style>
|
40 |
""", unsafe_allow_html=True)
|
41 |
else:
|
42 |
st.markdown("""
|
43 |
<style>
|
44 |
+
:root { --text-color: #111; }
|
45 |
+
body, .stApp {
|
46 |
+
background-color: #ffffff !important;
|
47 |
+
color: var(--text-color) !important;
|
48 |
+
}
|
49 |
+
.stTextInput input {
|
50 |
+
background-color: #f0f0f0 !important;
|
51 |
+
color: var(--text-color) !important;
|
52 |
+
}
|
53 |
+
.stTextInput label {
|
54 |
+
color: var(--text-color) !important;
|
55 |
+
}
|
56 |
+
input::placeholder {
|
57 |
+
color: #444 !important;
|
58 |
+
}
|
59 |
+
.stButton>button {
|
60 |
+
background-color: #e0e0e0 !important;
|
61 |
+
color: var(--text-color) !important;
|
62 |
+
}
|
63 |
</style>
|
64 |
""", unsafe_allow_html=True)
|
65 |
|
66 |
+
# π§ App title
|
67 |
st.header("π©Ί MedMCQA Chatbot")
|
68 |
st.caption("Ask a medical question and get answers from the MedMCQA dataset only. If not found, it will respond gracefully.")
|
69 |
|
70 |
+
# βοΈ Query box
|
71 |
+
query = st.text_input(
|
72 |
+
"π Enter your medical question:",
|
73 |
+
placeholder="e.g., What is the mechanism of Aspirin?",
|
74 |
+
label_visibility="visible"
|
75 |
+
)
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
# π Answer generation
|
78 |
if query:
|
|
|
82 |
with st.spinner("π§ Generating answer..."):
|
83 |
response = generate_answer(query, context)
|
84 |
|
85 |
+
st.markdown("""
|
86 |
+
<style>
|
87 |
+
.fade-in {
|
88 |
+
animation: fadeIn 0.7s ease-in;
|
89 |
+
}
|
90 |
+
@keyframes fadeIn {
|
91 |
+
0% { opacity: 0; transform: translateY(20px); }
|
92 |
+
100% { opacity: 1; transform: translateY(0); }
|
93 |
+
}
|
94 |
+
</style>
|
95 |
+
""", unsafe_allow_html=True)
|
96 |
+
|
97 |
+
st.markdown("<div class='fade-in'><h4>π§ Answer:</h4></div>", unsafe_allow_html=True)
|
98 |
answer_placeholder = st.empty()
|
99 |
final_text = ""
|
100 |
for char in response:
|
101 |
final_text += char
|
102 |
+
answer_placeholder.markdown(f"<div class='fade-in'>{final_text}</div>", unsafe_allow_html=True)
|
103 |
sleep(0.01)
|
104 |
|
105 |
with st.expander("π Top Matches"):
|
106 |
for i, doc in enumerate(results, 1):
|
107 |
+
content = doc.page_content
|
108 |
+
if query.lower() in content.lower():
|
109 |
+
content = content.replace(query, f"**{query}**")
|
110 |
+
st.markdown(f"**Result {i}:**\n\n{content}")
|
111 |
|
112 |
# π¬ Sidebar Contact
|
113 |
with st.sidebar:
|
|
|
115 |
st.markdown("### π¬ Contact")
|
116 |
st.markdown("[π§ Email](mailto:[email protected])")
|
117 |
st.markdown("[π LinkedIn](https://linkedin.com/in/sankethhonavar)")
|
118 |
+
st.markdown("[π» GitHub](https://github.com/sankethhonavar)")
|
119 |
+
|
120 |
+
# β¨ Floating Icons (Right side - Top aligned)
|
121 |
+
st.markdown("""
|
122 |
+
<style>
|
123 |
+
.floating-button {
|
124 |
+
position: fixed;
|
125 |
+
top: 80px;
|
126 |
+
right: 20px;
|
127 |
+
display: flex;
|
128 |
+
flex-direction: column;
|
129 |
+
gap: 12px;
|
130 |
+
z-index: 9999;
|
131 |
+
}
|
132 |
+
.floating-button a {
|
133 |
+
background-color: #0077b5;
|
134 |
+
color: white;
|
135 |
+
padding: 10px 14px;
|
136 |
+
border-radius: 50%;
|
137 |
+
text-align: center;
|
138 |
+
font-size: 20px;
|
139 |
+
text-decoration: none;
|
140 |
+
box-shadow: 2px 2px 8px rgba(0, 0, 0, 0.3);
|
141 |
+
transition: background-color 0.3s;
|
142 |
+
}
|
143 |
+
.floating-button a:hover {
|
144 |
+
background-color: #005983;
|
145 |
+
}
|
146 |
+
.floating-button a.email {
|
147 |
+
background-color: #444444;
|
148 |
+
}
|
149 |
+
.floating-button a.email:hover {
|
150 |
+
background-color: #222222;
|
151 |
+
}
|
152 |
+
.floating-button a.github {
|
153 |
+
background-color: #171515;
|
154 |
+
}
|
155 |
+
.floating-button a.github:hover {
|
156 |
+
background-color: #000000;
|
157 |
+
}
|
158 |
+
</style>
|
159 |
+
|
160 |
+
<div class="floating-button">
|
161 |
+
<a href="mailto:[email protected]" class="email" title="Email Me">
|
162 |
+
<img src="https://img.icons8.com/ios-filled/25/ffffff/new-post.png" alt="Email"/>
|
163 |
+
</a>
|
164 |
+
<a href="https://linkedin.com/in/sankethhonavar" target="_blank" title="LinkedIn">
|
165 |
+
<img src="https://img.icons8.com/ios-filled/25/ffffff/linkedin.png" alt="LinkedIn"/>
|
166 |
+
</a>
|
167 |
+
<a href="https://github.com/SankethHonavar" target="_blank" class="github" title="GitHub">
|
168 |
+
<img src="https://img.icons8.com/ios-filled/25/ffffff/github.png" alt="GitHub"/>
|
169 |
+
</a>
|
170 |
+
</div>
|
171 |
+
""", unsafe_allow_html=True)
|
172 |
+
|
173 |
+
# π Footer
|
174 |
+
st.markdown("""
|
175 |
+
---
|
176 |
+
<p style='text-align: center; font-size: 0.9rem; color: grey'>
|
177 |
+
Made with β€οΈ by <a href='https://linkedin.com/in/sankethhonavar' target='_blank'>Sanketh Honavar</a>
|
178 |
+
</p>
|
179 |
+
""", unsafe_allow_html=True)
|
build_faiss_index.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
# build_faiss_index.py
|
2 |
-
import faiss
|
3 |
-
import pickle
|
4 |
-
import numpy as np
|
5 |
-
from datasets import load_dataset
|
6 |
-
from langchain.embeddings import HuggingFaceEmbeddings # Or your embedding class
|
7 |
-
|
8 |
-
def format_entry(entry):
|
9 |
-
return {
|
10 |
-
"question": entry["question"],
|
11 |
-
"formatted": (
|
12 |
-
f"Q: {entry['question']}\n"
|
13 |
-
f"A. {entry['opa']} B. {entry['opb']} C. {entry['opc']} D. {entry['opd']}\n"
|
14 |
-
f"Correct Answer: {entry['cop']}\n"
|
15 |
-
f"Explanation: {entry['exp']}"
|
16 |
-
)
|
17 |
-
}
|
18 |
-
|
19 |
-
print("Loading MedMCQA dataset (5000 rows)...")
|
20 |
-
dataset = load_dataset("medmcqa", split="train[:5000]")
|
21 |
-
formatted_data = [format_entry(entry) for entry in dataset]
|
22 |
-
|
23 |
-
# Extract questions for embeddings
|
24 |
-
questions = [entry["formatted"] for entry in formatted_data]
|
25 |
-
|
26 |
-
print("Generating embeddings...")
|
27 |
-
embeddings_model = HuggingFaceEmbeddings() # Or your specific embeddings
|
28 |
-
vectors = np.array([embeddings_model.embed_query(q) for q in questions], dtype="float32")
|
29 |
-
|
30 |
-
print("Building FAISS index...")
|
31 |
-
index = faiss.IndexFlatL2(vectors.shape[1])
|
32 |
-
index.add(vectors)
|
33 |
-
|
34 |
-
# Save FAISS index and data
|
35 |
-
faiss.write_index(index, "data/medmcqa_index/index.faiss")
|
36 |
-
with open("data/medmcqa_index/index.pkl", "wb") as f:
|
37 |
-
pickle.dump(formatted_data, f)
|
38 |
-
|
39 |
-
print("FAISS index saved successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataset_loader.py
CHANGED
@@ -1,11 +1,18 @@
|
|
1 |
# dataset_loader.py
|
2 |
-
import
|
3 |
-
import faiss
|
4 |
|
5 |
-
def
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# dataset_loader.py
|
2 |
+
from datasets import load_dataset
|
|
|
3 |
|
4 |
+
def load_medmcqa_subset(limit=5000):
|
5 |
+
dataset = load_dataset("medmcqa", split=f"train[:{limit}]")
|
6 |
+
|
7 |
+
def format_entry(entry):
|
8 |
+
return {
|
9 |
+
"question": entry["question"],
|
10 |
+
"formatted": (
|
11 |
+
f"Q: {entry['question']}\n"
|
12 |
+
f"A. {entry['opa']} B. {entry['opb']} C. {entry['opc']} D. {entry['opd']}\n"
|
13 |
+
f"Correct Answer: {entry['cop']}\n"
|
14 |
+
f"Explanation: {entry['exp']}"
|
15 |
+
)
|
16 |
+
}
|
17 |
+
|
18 |
+
return [format_entry(entry) for entry in dataset]
|