msi commited on
Commit
3876d7b
·
1 Parent(s): 016fab6

Fix: moved large

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ app/data/faiss_db/index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ app/data/faiss_db/index.pkl filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
+
5
+ from app.api import analysis, reports, chatbot, infos
6
+ from langchain_community.chat_models import ChatOpenAI
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from app.utils.rag import get_chain_disease, get_chain_infos, get_chain_chat
10
+ import os, asyncio
11
+
12
+
13
+ HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
14
+ MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
15
+
16
+
17
+ @asynccontextmanager
18
+ async def lifespan(app: FastAPI):
19
+ embedding_model = await asyncio.to_thread(
20
+ HuggingFaceEmbeddings,
21
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
22
+ model_kwargs={"use_auth_token": HUGGINGFACE_API_KEY}
23
+ )
24
+
25
+ vectorstore = await asyncio.to_thread(
26
+ FAISS.load_local,
27
+ r"app/data/faiss_db",
28
+ embeddings=embedding_model,
29
+ allow_dangerous_deserialization=True
30
+ )
31
+
32
+ llm = ChatOpenAI(
33
+ base_url="https://api.mistral.ai/v1",
34
+ api_key=MISTRAL_API_KEY,
35
+ model_name="mistral-medium"
36
+ )
37
+
38
+ app.state.diagnosis_chain = get_chain_disease(llm, vectorstore)
39
+ app.state.info_chain = get_chain_infos(llm, vectorstore)
40
+ app.state.chat_chain = get_chain_chat(llm=llm, vectorstore=vectorstore)
41
+
42
+ yield
43
+
44
+ app = FastAPI(
45
+ title="Symptom Checker API",
46
+ description="API for symptom checking and analysis",
47
+ version="0.1.0",
48
+ lifespan=lifespan
49
+ )
50
+
51
+ # CORS configuration
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ allow_origins=["*"],
55
+ allow_credentials=True,
56
+ allow_methods=["*"],
57
+ allow_headers=["*"],
58
+ )
59
+
60
+ # Include routers
61
+ app.include_router(analysis.router, prefix="/api/analysis", tags=["analysis"])
62
+ app.include_router(infos.router, prefix="/api/infos", tags=["infos"])
63
+ app.include_router(reports.router, prefix="/api/reports", tags=["report"])
64
+ app.include_router(chatbot.router, prefix="/api/chatbot", tags=["chat"])
65
+
66
+ @app.get("/")
67
+ async def root():
68
+ return {"message": "Symptom Checker API is running"}
app/api/analysis.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException,Request
2
+ import json
3
+
4
+ from app.utils.models import AnalysisResult, SymptomAnalysisRequest
5
+ from typing import Union
6
+
7
+ import re
8
+
9
+ router = APIRouter()
10
+
11
+
12
+
13
+ def process_rag_response(rag_response: Union[str, None]) -> AnalysisResult:
14
+ if not rag_response or not rag_response.strip():
15
+ raise ValueError("Empty response from RAG model")
16
+
17
+ # Extract JSON array from any extra text
18
+ match = re.search(r'\[\s*\{.*?\}\s*\]', rag_response, re.DOTALL)
19
+ if not match:
20
+ raise ValueError("No valid JSON array found in response")
21
+
22
+ try:
23
+ data = json.loads(match.group(0))
24
+ except json.JSONDecodeError as e:
25
+ raise ValueError(f"Invalid JSON format: {e}")
26
+
27
+ if not isinstance(data, list) or not data:
28
+ raise ValueError("Parsed JSON is not a valid non-empty list")
29
+
30
+ # Build the diagnoses dict
31
+ diagnoses = {
32
+ item.get("disease", "Unknown"): item.get("probability", 0) / 100
33
+ for item in data
34
+ }
35
+
36
+ return AnalysisResult(diagnoses=diagnoses)
37
+
38
+
39
+
40
+ def analyze_symptoms(request: Request, symptoms: str) -> AnalysisResult:
41
+ try:
42
+ rag_response = request.app.state.diagnosis_chain.run(symptoms)
43
+ return process_rag_response(rag_response)
44
+ except Exception as e:
45
+ raise HTTPException(status_code=500, detail=f"Symptom analysis failed: {str(e)}")
46
+
47
+
48
+ @router.post("/analyze", response_model=AnalysisResult)
49
+ async def analyze_selected_symptoms(request: Request, body: SymptomAnalysisRequest):
50
+ if not body.symptoms:
51
+ raise HTTPException(status_code=400, detail="At least one symptom is required")
52
+ return analyze_symptoms(request, body.symptoms)
53
+
54
+
app/api/chatbot.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request, APIRouter
2
+ from app.utils.models import ChatRequest
3
+
4
+
5
+ # Initialize FastAPI app
6
+ router = APIRouter()
7
+
8
+ # Store user queries
9
+ chat_history_list = []
10
+
11
+
12
+
13
+ # Endpoint for chatting
14
+ @router.post("/chat")
15
+ async def chat(request_chat: Request, request: ChatRequest):
16
+ user_question = request.question
17
+ chat_history_list.append(user_question) # Track all queries
18
+ response = request_chat.app.state.chat_chain.run(user_question)
19
+
20
+ return {
21
+ "question": user_question,
22
+ "answer": response,
23
+ "history": chat_history_list
24
+ }
app/api/infos.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from app.utils.pdf_process import generate_pdf_report
3
+ from app.utils.infos_process import extract_dict_from_text
4
+ from app.utils.models import DiseaseInfo, DiseaseInput
5
+
6
+ from fastapi import APIRouter, HTTPException, Request
7
+ router = APIRouter()
8
+
9
+ def get_disease_info(request: Request,disease_name: str) -> DiseaseInfo:
10
+ dict_str = request.app.state.info_chain.run(disease_name)
11
+ disease_info = extract_dict_from_text(dict_str)
12
+ return DiseaseInfo(infos=disease_info)
13
+
14
+
15
+
16
+
17
+ @router.post("/infos", response_model=DiseaseInfo)
18
+ async def give_full_infos(request: Request,requests: DiseaseInput):
19
+ if not requests.disease_name:
20
+ raise HTTPException(
21
+ status_code=400,
22
+ detail="At least one symptom is required"
23
+ )
24
+ disease_info=get_disease_info(request,requests.disease_name)
25
+ filename = f"app/report/report.pdf"
26
+ generate_pdf_report(disease_info.infos, filename)
27
+ return disease_info
app/api/reports.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from fastapi.responses import FileResponse
3
+ import os
4
+
5
+ router = APIRouter()
6
+
7
+ @router.get("/report")
8
+ async def get_report():
9
+ report_path = f"app/report/report.pdf"
10
+
11
+ if not os.path.exists(report_path):
12
+ raise HTTPException(
13
+ status_code=404,
14
+ detail="Report not found"
15
+ )
16
+
17
+ return FileResponse(
18
+ report_path,
19
+ media_type="application/pdf",
20
+ filename=f"Report.pdf"
21
+ )
app/data/faiss_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f496ab9a89996bd7ed435529a7793d1844eccd8a11c4ac907bdebdeb2ade9868
3
+ size 23780397
app/data/faiss_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b227f23738538e47fbda442e92c93b8736000cf42ec8e4f61af53ac2761e6b3b
3
+ size 13678957
app/report/report.pdf ADDED
The diff for this file is too large to render. See raw diff
 
app/utils/infos_process.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+
4
+ def fix_multiline_strings(s):
5
+ lines = s.split('\n')
6
+ new_lines = []
7
+ buffer = ""
8
+ in_string = False
9
+
10
+ for line in lines:
11
+ line = line.rstrip()
12
+ quote_count = line.count('"')
13
+ if not in_string:
14
+ buffer = line
15
+ if quote_count % 2 == 1: # Odd number of quotes -> string not closed
16
+ in_string = True
17
+ else:
18
+ new_lines.append(buffer)
19
+ else:
20
+ buffer += ' ' + line
21
+ if quote_count % 2 == 1:
22
+ new_lines.append(buffer)
23
+ in_string = False
24
+
25
+ if in_string:
26
+ new_lines.append(buffer) # Last incomplete line
27
+ return "\n".join(new_lines)
28
+
29
+ def extract_dict_from_text(text):
30
+ # Extract the code block
31
+ match = re.search(r"```(?:python)?\s*({.*?})\s*```", text, re.DOTALL)
32
+ if not match:
33
+ match = re.search(r"({.*})", text, re.DOTALL)
34
+ if not match:
35
+ return None
36
+
37
+ dict_str = match.group(1)
38
+
39
+ # Step 1: Replace unescaped line breaks inside strings
40
+
41
+ dict_str = fix_multiline_strings(dict_str)
42
+
43
+ # Step 2: Try parsing as JSON
44
+ try:
45
+ return json.loads(dict_str)
46
+ except json.JSONDecodeError as je:
47
+ print("JSON decode error:", je)
48
+ return None
app/utils/map_details.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from selenium import webdriver
4
+ from selenium.webdriver.chrome.options import Options
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.support.ui import WebDriverWait
7
+ from selenium.webdriver.support import expected_conditions as EC
8
+ import geocoder
9
+ import folium
10
+ from folium import IFrame
11
+ from geopy.distance import geodesic
12
+
13
+
14
+ def get_doctor_links(city, speciality):
15
+ # Setup Chrome options
16
+ options = Options()
17
+ options.headless = True # Run in headless mode
18
+ options.add_argument('--headless') # Disable GPU acceleration
19
+ options.add_argument('--no-sandbox')
20
+ options.add_argument('--disable-dev-shm-usage')
21
+ # Initialize WebDriver (make sure chromedriver is in your PATH or specify path)
22
+ driver = webdriver.Chrome(options=options)
23
+ links = []
24
+ try:
25
+ url = f"https://www.med.tn/doctor/{speciality}/{city}"
26
+ driver.get(url)
27
+
28
+ # Wait for the doctor cards to load
29
+ WebDriverWait(driver, 10).until(
30
+ EC.presence_of_all_elements_located((By.CLASS_NAME, "card-doctor-block "))
31
+ )
32
+
33
+ # Extract doctor cards
34
+ doctor_cards = driver.find_elements(By.CLASS_NAME, "card-doctor-block ")
35
+
36
+ for i, card in enumerate(doctor_cards, 1):
37
+ links.append(card.find_element(By.TAG_NAME, 'a').get_attribute('href'))
38
+
39
+ finally:
40
+ driver.quit()
41
+ return links
42
+
43
+
44
+
45
+ def extract_doctor_profile(url):
46
+ # Headers to mimic a browser
47
+ headers = {
48
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
49
+ }
50
+
51
+ # Send request
52
+ response = requests.get(url, headers=headers)
53
+ response.raise_for_status()
54
+
55
+ # Parse HTML
56
+ soup = BeautifulSoup(response.text, 'html.parser')
57
+
58
+ # Extract profile label section
59
+ profile = soup.find('div', class_='profile__label')
60
+ if not profile:
61
+ raise Exception("Profile section not found")
62
+
63
+ # Extract image
64
+ img_tag = soup.find('div', class_='profile__photo')
65
+ img = ""
66
+ if img_tag and img_tag.find('img'):
67
+ img_src = img_tag.find('img')['src']
68
+ img = img_src.replace(
69
+ 'https://imagecdn.med.ovh/unsafe/195x195/filters:format():quality(10):blur(30)/', '')
70
+
71
+ # Extract name, speciality, city
72
+ name = profile.find('h1').text.strip() if profile.find('h1') else ''
73
+ speciality = profile.find('div', class_='profile__label--spe')
74
+ speciality = speciality.text.strip() if speciality else ''
75
+ city = profile.find('div', class_='profile__label--adr')
76
+ city = city.text.strip() if city else ''
77
+
78
+ # Extract address
79
+ address_tag = soup.find('span', class_='profile__adr')
80
+ address = address_tag.text.strip() if address_tag else ''
81
+
82
+ # Extract phone numbers
83
+ phone_numbers = []
84
+ if soup.find('div', class_='displaynum'):
85
+ num_tels = soup.find('div', class_='displaynum')
86
+ phone_numbers = [a.get_text(strip=True) for a in num_tels.find_all('a')]
87
+ if phone_numbers[-1].startswith('Book'):
88
+ phone_numbers.pop()
89
+ else:
90
+ phone_numbers = ['N/A']
91
+ # Extract map position
92
+ map_tag = soup.find('a', class_='btn-itineraire') or soup.find('a', target='_dir')
93
+ map_position = ""
94
+ if map_tag and 'href' in map_tag.attrs:
95
+ map_position = map_tag['href'].replace('?api=1&destination=', '')
96
+
97
+ # Final output
98
+ full_details = {
99
+ 'img': img,
100
+ 'name': name,
101
+ 'speciality': speciality,
102
+ 'city': city,
103
+ 'address': address,
104
+ 'phone_numbers': phone_numbers,
105
+ 'map_position': map_position
106
+ }
107
+ return full_details
108
+
109
+ def get_my_location():
110
+ g = geocoder.ip('me')
111
+ if not g.ok or not g.latlng:
112
+ raise RuntimeError("Unable to detect your location.")
113
+
114
+ user_lat, user_lon = g.latlng
115
+ city = g.city or "Your Location"
116
+ country = g.country or ""
117
+ return user_lat, user_lon, city, country
118
+
119
+ def create_the_map(speciality):
120
+
121
+ # Get user location
122
+ user_lat, user_lon, city, country = get_my_location()
123
+ map = folium.Map(location=[user_lat, user_lon], zoom_start=12)
124
+ folium.Marker(
125
+ [user_lat, user_lon],
126
+ popup=f"You are here: {city}, {country}",
127
+ icon=folium.Icon(color='red', icon='user')
128
+ ).add_to(map)
129
+
130
+ # Fetch doctors
131
+ all_docs = get_doctor_links(city.lower(), speciality.lower())
132
+
133
+ for url in all_docs:
134
+ try:
135
+ doc = extract_doctor_profile(url)
136
+ if not doc.get("map_position"):
137
+ continue
138
+
139
+ lat_str, lon_str = doc['map_position'].replace('https://www.google.com/maps/dir/','').split(',')
140
+ lat, lon = float(lat_str), float(lon_str)
141
+
142
+ # Calculate distance to user
143
+ distance_km = geodesic((user_lat, user_lon), (lat, lon)).km
144
+ if distance_km > 4:
145
+ continue # Skip doctors farther than 4km
146
+
147
+ # Build popup
148
+ img_html = f"<img src='{doc['img']}' width='100' height='100'><br>" if doc.get("img") else ""
149
+ phone_html = "<br>".join(doc.get("phone_numbers", []))
150
+ popup_html = f"""
151
+ {img_html}
152
+ <b>{doc['name']}</b><br>
153
+ <i>{doc['speciality']}</i><br>
154
+ <b>Address:</b> {doc['address']}<br>
155
+ <b>Phone:</b><br>{phone_html}
156
+ <br><b>Distance:</b> {distance_km:.2f} km
157
+ """
158
+
159
+ iframe = IFrame(popup_html, width=250, height=250)
160
+ popup = folium.Popup(iframe, max_width=300)
161
+
162
+ folium.Marker(
163
+ [lat, lon],
164
+ popup=popup,
165
+ icon=folium.Icon(color='blue', icon='plus')
166
+ ).add_to(map)
167
+
168
+ except Exception as e:
169
+ print(f"Error processing {url}: {e}")
170
+
171
+ # Save map
172
+ map.save("map.html")
app/utils/models.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+
4
+ class SymptomAnalysisRequest(BaseModel):
5
+ symptoms: str
6
+ language: Optional[str] = "en"
7
+ detailed: Optional[bool] = False
8
+
9
+ # Pydantic model: only the diagnoses dictionary
10
+ class AnalysisResult(BaseModel):
11
+ diagnoses: dict # disease -> confidence (0.0 to 1.0)
12
+
13
+ class DiseaseInfo(BaseModel):
14
+ infos: dict
15
+
16
+ class DiseaseInput(BaseModel):
17
+ disease_name: str
18
+ language: Optional[str] = "en"
19
+
20
+ class ChatRequest(BaseModel):
21
+ question: str
app/utils/pdf_process.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from reportlab.platypus import Frame, NextPageTemplate, PageTemplate, BaseDocTemplate, Paragraph, Image, Spacer,Paragraph
2
+ from reportlab.lib.units import cm
3
+ from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
4
+ from reportlab.lib.enums import TA_JUSTIFY
5
+ from reportlab.lib.pagesizes import letter
6
+ from reportlab.lib import colors
7
+ from datetime import datetime
8
+
9
+ def generate_pdf_report(disease_info, filename="report.pdf")->None:
10
+ """Generates a scientific article-style PDF report with two-column layout and a logo."""
11
+
12
+ # Custom styles
13
+ styles = getSampleStyleSheet()
14
+
15
+ # Add custom styles
16
+ styles.add(ParagraphStyle(
17
+ name='TitleStyle',
18
+ parent=styles['Heading1'],
19
+ fontSize=16,
20
+ leading=20,
21
+ alignment=1, # Center
22
+ spaceAfter=20,
23
+ textColor=colors.darkblue
24
+ ))
25
+
26
+ styles.add(ParagraphStyle(
27
+ name='AuthorStyle',
28
+ parent=styles['Heading3'],
29
+ fontSize=10,
30
+ leading=12,
31
+ alignment=1,
32
+ spaceAfter=20,
33
+ textColor=colors.darkgrey
34
+ ))
35
+
36
+ styles.add(ParagraphStyle(
37
+ name='AbstractStyle',
38
+ parent=styles['BodyText'],
39
+ fontSize=10,
40
+ leading=12,
41
+ alignment=TA_JUSTIFY,
42
+ backColor=colors.lightgrey,
43
+ borderPadding=5,
44
+ spaceAfter=20
45
+ ))
46
+
47
+ styles.add(ParagraphStyle(
48
+ name='SectionHeader',
49
+ parent=styles['Heading2'],
50
+ fontSize=12,
51
+ leading=14,
52
+ spaceBefore=15,
53
+ spaceAfter=10,
54
+ textColor=colors.darkblue,
55
+ underlineWidth=1,
56
+ underlineColor=colors.darkblue,
57
+ underlineOffset=-5
58
+ ))
59
+
60
+ styles.add(ParagraphStyle(
61
+ name='LeftColumn',
62
+ parent=styles['BodyText'],
63
+ fontSize=10,
64
+ leading=12,
65
+ alignment=TA_JUSTIFY,
66
+ leftIndent=0,
67
+ rightIndent=5,
68
+ spaceAfter=7
69
+ ))
70
+
71
+ styles.add(ParagraphStyle(
72
+ name='RightColumn',
73
+ parent=styles['BodyText'],
74
+ fontSize=10,
75
+ leading=12,
76
+ alignment=TA_JUSTIFY,
77
+ leftIndent=5,
78
+ rightIndent=0,
79
+ spaceAfter=7
80
+ ))
81
+
82
+ styles.add(ParagraphStyle(
83
+ name='BulletPoint',
84
+ parent=styles['BodyText'],
85
+ fontSize=10,
86
+ leading=12,
87
+ leftIndent=15,
88
+ bulletIndent=0,
89
+ spaceAfter=3,
90
+ bulletFontName='Symbol',
91
+ bulletFontSize=8
92
+ ))
93
+
94
+ styles.add(ParagraphStyle(
95
+ name='Reference',
96
+ parent=styles['Italic'],
97
+ fontSize=8,
98
+ leading=10,
99
+ textColor=colors.darkgrey,
100
+ spaceBefore=15
101
+ ))
102
+
103
+ # Create document with two columns
104
+ class TwoColumnDocTemplate(BaseDocTemplate):
105
+ def __init__(self, filename, **kw):
106
+ BaseDocTemplate.__init__(self, filename, **kw)
107
+ # Calculate column widths
108
+ page_width = self.pagesize[0] - 2*self.leftMargin
109
+ col_width = (page_width - 1*cm) / 2 # 1cm gutter
110
+
111
+ # First page template with title
112
+ first_page = PageTemplate(id='FirstPage',
113
+ frames=[
114
+ Frame(self.leftMargin, self.bottomMargin,
115
+ col_width, self.height,
116
+ id='leftCol'),
117
+ Frame(self.leftMargin + col_width + 1*cm,
118
+ self.bottomMargin,
119
+ col_width, self.height,
120
+ id='rightCol')
121
+ ])
122
+ self.addPageTemplates(first_page)
123
+
124
+ # Other pages template
125
+ other_pages = PageTemplate(id='OtherPages',
126
+ frames=[
127
+ Frame(self.leftMargin, self.bottomMargin,
128
+ col_width, self.height,
129
+ id='leftCol2'),
130
+ Frame(self.leftMargin + col_width + 1*cm,
131
+ self.bottomMargin,
132
+ col_width, self.height,
133
+ id='rightCol2')
134
+ ])
135
+ self.addPageTemplates(other_pages)
136
+
137
+ doc = TwoColumnDocTemplate(filename,
138
+ pagesize=letter,
139
+ leftMargin=2*cm,
140
+ rightMargin=2*cm,
141
+ topMargin=2*cm,
142
+ bottomMargin=2*cm)
143
+
144
+ story = []
145
+
146
+ # Add logo at the top (centered)
147
+ try:
148
+ logo = Image('app/data/logo_platform.jpg', width=6*cm, height=2*cm)
149
+ logo.hAlign = 'CENTER'
150
+ story.append(logo)
151
+ story.append(Spacer(1, 0.3*cm))
152
+ except Exception as e:
153
+ pass # If logo not found, skip
154
+
155
+ # Title and authors
156
+ title = Paragraph(disease_info.get('Title', 'Medical Condition Report'), styles['TitleStyle'])
157
+ authors = Paragraph("Generated by AIHealthCheck AI Assistant", styles['AuthorStyle'])
158
+ date = Paragraph(datetime.now().strftime("%B %d, %Y"), styles['AuthorStyle'])
159
+
160
+ story.append(title)
161
+ story.append(authors)
162
+ story.append(date)
163
+ story.append(NextPageTemplate('OtherPages')) # Switch to two-column layout
164
+
165
+ # Abstract
166
+ abstract_text = f"<b>Abstract</b><br/><br/>{disease_info.get('Overview', 'No overview available.')}"
167
+ abstract = Paragraph(abstract_text, styles['AbstractStyle'])
168
+ story.append(abstract)
169
+
170
+ # Function to format content
171
+
172
+ def format_content(text, styles):
173
+ if not text:
174
+ return []
175
+
176
+ # If it's a list, return each item as a bullet paragraph
177
+ if isinstance(text, list):
178
+ return [Paragraph(f"• {item}", styles) for item in text]
179
+
180
+ # If it's a dict, render each key-value pair
181
+ if isinstance(text, dict):
182
+ items = []
183
+ for key, value in text.items():
184
+ if isinstance(value, list):
185
+ items.append(Paragraph(f"<b>{key}:</b>", styles))
186
+ items.extend([Paragraph(f"• {v}", styles) for v in value])
187
+ else:
188
+ items.append(Paragraph(f"• <b>{key}:</b> {value}", styles))
189
+ return items
190
+
191
+ # Otherwise, treat it as a simple paragraph
192
+ return [Paragraph(text, styles)]
193
+
194
+
195
+
196
+ # Organize content into left and right columns
197
+ left_column_content = [
198
+ ('Symptoms', disease_info.get('Symptoms')),
199
+ ('Causes', disease_info.get('Causes')),
200
+ ('Risk Factors', disease_info.get('Risk factors')),
201
+ ('Complications', disease_info.get('Complications'))
202
+ ]
203
+
204
+ right_column_content = [
205
+ ('Diagnosis', disease_info.get('Diagnosis')),
206
+ ('Treatment', disease_info.get('Treatment')),
207
+ ('Prevention', disease_info.get('Prevention')),
208
+ ('When to See a Doctor', disease_info.get('When to see a doctor')),
209
+ ('Lifestyle and Home Remedies', disease_info.get('Lifestyle and home remedies'))
210
+ ]
211
+
212
+ # Add left column content
213
+ for section, content in left_column_content:
214
+ if content:
215
+ story.append(Paragraph(section, styles['SectionHeader']))
216
+ formatted = format_content(content, styles['LeftColumn'])
217
+ if isinstance(formatted, list):
218
+ story.extend(formatted)
219
+ else:
220
+ story.append(formatted)
221
+
222
+ # Switch to right column
223
+ story.append(NextPageTemplate('OtherPages'))
224
+
225
+ # Add right column content
226
+ for section, content in right_column_content:
227
+ if content:
228
+ story.append(Paragraph(section, styles['SectionHeader']))
229
+ formatted = format_content(content, styles['RightColumn'])
230
+ if isinstance(formatted, list):
231
+ story.extend(formatted)
232
+ else:
233
+ story.append(formatted)
234
+
235
+ # Add references
236
+ story.append(Paragraph("Medical Recommendations", styles['SectionHeader']))
237
+ story.append(Paragraph(disease_info.get('Medical Recommendation', 'No medical recommendations available.'), styles['LeftColumn']))
238
+ story.append(Paragraph("References", styles['SectionHeader']))
239
+ story.append(Paragraph("1. Mayo Clinic Medical References", styles['Reference']))
240
+
241
+ # Build the PDF
242
+ doc.build(story)
243
+
app/utils/rag.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.prompts import ChatPromptTemplate
5
+ import warnings
6
+ from langchain.memory import ConversationBufferMemory
7
+ from langchain.chains import ConversationalRetrievalChain, LLMChain, StuffDocumentsChain
8
+ warnings.filterwarnings("ignore")
9
+ import os
10
+
11
+
12
+ def get_chain_infos(llm, vectorstore):
13
+ # Prompt templates
14
+ prompt_template = ChatPromptTemplate.from_template("""
15
+ You are a knowledgeable and precise medical assistant. Your task is to extract and summarize key information
16
+ from the raw disease content into a structured, clean, and medically accurate Python dictionary format with the following keys:
17
+ ['Title', 'Overview', 'Symptoms', 'Causes', 'Risk factors', 'Complications', 'Prevention',
18
+ 'When to see a doctor', 'Diagnosis', 'Treatment', 'Lifestyle and home remedies', 'Medical Recommendation']
19
+
20
+ Disease Raw Info:
21
+ {context}
22
+
23
+ Guidelines:
24
+ - Provide concise, factual, and medically sound content for each field.
25
+ - For 'Medical Recommendation':
26
+ • Give the most accurate and actionable advice based on the disease context.
27
+ • If medical consultation is required, specify the **exact medical specialty** (e.g., cardiologist, neurologist).
28
+ • If home care is sufficient, describe **precise steps or remedies** clearly and briefly.
29
+ • Keep it **tailored, specific, and informative**—avoid vague generalities.
30
+ • When applicable, be brief without sacrificing clarity or accuracy.
31
+
32
+ Output:
33
+ Return only a valid JSON-like Python dictionary containing the summarized disease information under each specified key. Do not include any extra commentary or explanation.
34
+ """)
35
+ # Initialize the RetrievalQA chain
36
+ info_chain = RetrievalQA.from_chain_type(
37
+ llm=llm,
38
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 1}),
39
+ chain_type="stuff", # or "map_reduce", etc. depending on what you want
40
+ chain_type_kwargs={"prompt": prompt_template},
41
+ return_source_documents=False
42
+ )
43
+
44
+ return info_chain
45
+
46
+ def get_chain_disease(llm, vectorstore):
47
+
48
+ prompt = PromptTemplate(
49
+ input_variables=["context", "question"],
50
+ template="""
51
+ You are a medical diagnosis assistant.
52
+
53
+ Use the context below to identify which disease best matches the given symptoms.
54
+
55
+ Return your response in this format as a JSON array of objects WITHOUT any additional text:
56
+ [
57
+ {{
58
+ "disease": "Disease Name",
59
+ "probability": 87
60
+ }},
61
+ {{
62
+ "disease": "Other Likely Disease",
63
+ "probability": 13
64
+ }}
65
+ ]
66
+
67
+ ### Context:
68
+ {context}
69
+
70
+ ### Symptoms:
71
+ {question}
72
+ """
73
+ )
74
+
75
+ # RAG Chains
76
+ diagnosis_chain = RetrievalQA.from_chain_type(
77
+ llm=llm,
78
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
79
+ chain_type_kwargs={"prompt": prompt},
80
+ return_source_documents=False
81
+ )
82
+ return diagnosis_chain
83
+
84
+ def get_chain_chat(llm, vectorstore):
85
+ # Memory
86
+ memory = ConversationBufferMemory(
87
+ memory_key="chat_history",
88
+ return_messages=True
89
+ )
90
+
91
+ # Prompts
92
+ answer_prompt_template = ChatPromptTemplate.from_template("""
93
+ You are a bilingual knowledgeable and precise medical assistant fluent in both English and French. Your task is to provide accurate and concise answers to medical queries.
94
+ Detect the language of the query (English or French) and respond in the same language.
95
+ When answering, please ensure that your response is clear with no extra explanation. If the question is ambiguous or requires further clarification, ask for more details.
96
+
97
+ Context: {context}
98
+ Question: {question}
99
+ Answer:
100
+ """)
101
+
102
+ qa_llm_chain = LLMChain(llm=llm, prompt=answer_prompt_template)
103
+
104
+ combine_docs_chain = StuffDocumentsChain(
105
+ llm_chain=qa_llm_chain,
106
+ document_variable_name="context"
107
+ )
108
+
109
+ condense_question_prompt = PromptTemplate.from_template("""
110
+ Given the following conversation and a follow-up question, rephrase the follow-up question to be a standalone question.
111
+
112
+ Chat History:
113
+ {chat_history}
114
+ Follow-Up Input: {question}
115
+ Standalone question:
116
+ """)
117
+
118
+ question_generator = LLMChain(llm=llm, prompt=condense_question_prompt)
119
+
120
+ # Conversational chain
121
+ chat_chain = ConversationalRetrievalChain(
122
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
123
+ memory=memory,
124
+ question_generator=question_generator,
125
+ combine_docs_chain=combine_docs_chain,
126
+ return_source_documents=False
127
+ )
128
+ return chat_chain
report/report.pdf ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %PDF-1.4
2
+ %���� ReportLab Generated PDF document http://www.reportlab.com
3
+ 1 0 obj
4
+ <<
5
+ /F1 2 0 R /F2 3 0 R /F3 4 0 R /F4 5 0 R
6
+ >>
7
+ endobj
8
+ 2 0 obj
9
+ <<
10
+ /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
+ >>
12
+ endobj
13
+ 3 0 obj
14
+ <<
15
+ /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
+ >>
17
+ endobj
18
+ 4 0 obj
19
+ <<
20
+ /BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
21
+ >>
22
+ endobj
23
+ 5 0 obj
24
+ <<
25
+ /BaseFont /Helvetica-Oblique /Encoding /WinAnsiEncoding /Name /F4 /Subtype /Type1 /Type /Font
26
+ >>
27
+ endobj
28
+ 6 0 obj
29
+ <<
30
+ /Contents 10 0 R /MediaBox [ 0 0 612 792 ] /Parent 9 0 R /Resources <<
31
+ /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
32
+ >> /Rotate 0 /Trans <<
33
+
34
+ >>
35
+ /Type /Page
36
+ >>
37
+ endobj
38
+ 7 0 obj
39
+ <<
40
+ /PageMode /UseNone /Pages 9 0 R /Type /Catalog
41
+ >>
42
+ endobj
43
+ 8 0 obj
44
+ <<
45
+ /Author (\(anonymous\)) /CreationDate (D:20250524213346+01'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20250524213346+01'00') /Producer (ReportLab PDF Library - www.reportlab.com)
46
+ /Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
47
+ >>
48
+ endobj
49
+ 9 0 obj
50
+ <<
51
+ /Count 1 /Kids [ 6 0 R ] /Type /Pages
52
+ >>
53
+ endobj
54
+ 10 0 obj
55
+ <<
56
+ /Filter [ /ASCII85Decode /FlateDecode ] /Length 2091
57
+ >>
58
+ stream
59
+ Gatm<9lo&I&A@sBlqtkq%)Hj'*BU5Vj"tYJf$bLD?kO)LPCs"!9%75*"\D94D+RQlO_n5GEo4cTnTGu[f_?Z8hkU6s5DOG)#nR@($Qu&&_>G'4bV-\;ORC9,Z%CVb$&!J<\TUO555\T6C&+?=`t?DS"i_^tckrRoYRZa[LNRDR1if]XNa`g>T;OC=9QM[DI09(j@/^pRrND@"+8C/971b4"=BIlIKUr^lpE:cZA`NotRtWnl&]'hufeN>_Pt]V`V=;!Gb>1'Uk#HSiZ/c3U_51%7<OAiNN;L(+@I3g)MK*0:P/ssLDSYe>Qsdtl-aIs@@9UD$UD7JY"GZA9X+ZnEIiR(JM!N&G>T4r@`S2_YO#>=$MiMXQ!=aCfH-%@+/g"(>C!pUuWP%?pc!:9PETjn8k!+"m!aGb['pVbgMC4L+`Md6r;+ajX8jBL/PG2AqgD&<:IObafa*!hI<Q<sZ=%n0I!&#->d<?<lIBRoBR'PXKV/]P!X_H'Qr)!*=Gue<C@G"#4@`9"L""dGJp#GJHA4nOI22-_7MO-uWpjeTE7@kNEDA-[!ekEM<1A[fM:GSO)aQBcZCRXFU.?oe6*$93;&)n.I(T5'<&/S3/?=B#HEXG$UQ%@9^So%8?gP1s?.T9A$Napacj99uGk71RSCY,8)RH/LJd<MWE=ZkXlRoPWZ]*!OU<iKdd<++CN]+)156EM-$Z<h!4oJ.lNhgCh`QAD8CA]iG-JB=6,#qrQdKPWE+8uA"5=e&'dSq&#;r<a%$[MKQDV/g`OeHqnOgW?p!Z)U3c%$8,q-JF5/88fkO*_7-b%5&*Y3\Z))m4hGpH8T:77ts#d$go7App+M0EST1aPsIl<RUda1TfWJBGMLZ+)Sl.)(mr&@C)T8`"!H2;"8'-)8^:.7pJQ2<b77!YW%4E$K;*DgK,P8,>Ej%0`83!9Vaf[ROC9NObGcsZb3NNk78VSW-HoXs;n.1JLOQIeZClK[nWpc5f>3oZl-G,D]uICurcD;#<Q=V.nldp*G*fZ-(l/t$]hpfhWYP(L[T4O\1/6ctA>Y4X>,FI+$)9HgRT28BFN`9>=\u+q\M66_QKul,p2+SmpT.*_e("%a"2\J'VVH)=NcE_MGE6;]7F`01!Ob3bjfu'C93NY]"!hfLaT!i,9KZT!a<>Oml2*W=:H1#*^l+r,VQDbV@`<'6<pf;LIk-@(<]QICAQC[^4bPo#C$O9nE-Pdp5HOgd'3HNu=Yh>$2q8MI/&Zau3(Og5DfcgMLdu<>060WW=AYU@no2_;dDAc8i*$YaW4[KYFkI4f%rVkarSa>-Y@Odr?'YQJRfO3@p:9,hr<''Udr3kVQhS$%kXh/AFIt",,)&Y-C89<VdM5R*__d=ZSS9_2B;$&/Pc'a[FaCH@*UY5cFnj@g&C4#&>X'7!d45;In!&k-#?nPN\RFj6*p+*[6<Eh^r[cmnn6Mfd''4hBX5`7-&WKQC4.og6fe[^dO1]:H7q7@8IYBs8a`8O..Y#Q^$ce,@;B&-"0Bjj6RqGPJQ>i?/)Mb2DJ6*\Rglp\^HX?.72XE8d(S>FYrimL>-i3&H&K`oV"rps:Xg:"BiE21YEiKFRWa@C]!uCkb)f_*r2PtSM1Vm6:%!3j-Y8L4?-LN=TEL[=rIQ]i2k7b5L8M7%0Qm^NrP>gW(./&Fd(X:[email protected](pPkg6)!Lr]W]YO8JGd_gW!F6I2n@W:$XNFARBVi,'bB0q\(\K*@$gm0YKOMDK^**jPe>F+jt:b2h6.BXPm)S2bQ_7lo,E@o'Mj0CjFTo-Z[D[:]h3(RWNd9\'TK"5hH@hb5,R'REF1L,;^IQ],^C[sWatBToblmVYG/i%*953+CWci(eB/$oqL7?P0i"br2PsB$09>"qU\ejQ0S7VRgu#[email protected]>`Je?^l12[UQd2G;RG7Mn/d[S&c@)AsS:qF!^AJETkW1CCd-D[Q48X&.@o[bYr29e_AK8s4b!HLC*6!p>tW.D"jB!<&]()L:u_*MP4OrB=\[oL#s-Eh<.N9cH;LJbOXauBEq./$IuMUD.]pR:<oRsH@C$Dq_5Rl^3VM,_o7*`\QR#~>endstream
60
+ endobj
61
+ xref
62
+ 0 11
63
+ 0000000000 65535 f
64
+ 0000000073 00000 n
65
+ 0000000134 00000 n
66
+ 0000000241 00000 n
67
+ 0000000353 00000 n
68
+ 0000000472 00000 n
69
+ 0000000587 00000 n
70
+ 0000000781 00000 n
71
+ 0000000849 00000 n
72
+ 0000001132 00000 n
73
+ 0000001191 00000 n
74
+ trailer
75
+ <<
76
+ /ID
77
+ [<6b5b3613f29d0411c75de7a4437a5a87><6b5b3613f29d0411c75de7a4437a5a87>]
78
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
79
+
80
+ /Info 8 0 R
81
+ /Root 7 0 R
82
+ /Size 11
83
+ >>
84
+ startxref
85
+ 3374
86
+ %%EOF
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic<2.0.0
3
+ uvicorn
4
+ langchain
5
+ faiss-cpu
6
+ sentence-transformers
7
+ openai
8
+ reportlab
9
+ geopy
10
+ folium
11
+ geocoder
12
+ selenium
13
+ bs4