rivapereira123 commited on
Commit
28808c0
·
verified ·
1 Parent(s): 09cb14e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +917 -84
app.py CHANGED
@@ -1,97 +1,930 @@
 
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
 
 
 
 
3
  import torch
 
 
 
 
 
 
 
 
4
  from llama_index.core import (
5
- SimpleDirectoryReader,
6
- VectorStoreIndex,
 
7
  StorageContext,
8
- Settings
9
  )
10
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
11
  from llama_index.vector_stores.faiss import FaissVectorStore
 
12
  from llama_index.llms.huggingface import HuggingFaceLLM
13
- from llama_index.core.node_parser import SentenceSplitter
14
- from transformers import AutoTokenizer
15
- import faiss
16
 
17
- # ====== Configuration ======
18
- PDF_DIR = "./data"
19
- INDEX_SAVE_PATH = "./saved_index"
20
- CHUNK_SIZE = 512
21
- CHUNK_OVERLAP = 50
22
- EMBED_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
23
- LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct" # 3.8B parameter model
24
-
25
- # ====== Initialize Local Models ======
26
- # Embedding model (runs offline)
27
- Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL)
28
-
29
- # Local LLM with 4-bit quantization
30
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
31
- Settings.llm = HuggingFaceLLM(
32
- model_name=LLM_MODEL,
33
- tokenizer_name=LLM_MODEL,
34
- device_map="auto",
35
- model_kwargs={
36
- "torch_dtype": torch.float16,
37
- "trust_remote_code": True
38
- }
39
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # ====== Node Parser ======
42
- parser = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
43
-
44
- # ====== Build or Load Index ======
45
- if os.path.exists(INDEX_SAVE_PATH):
46
- # Load existing index
47
- vector_store = FaissVectorStore.from_persist_dir(INDEX_SAVE_PATH)
48
- storage_context = StorageContext.from_defaults(
49
- vector_store=vector_store,
50
- persist_dir=INDEX_SAVE_PATH
51
- )
52
- index = VectorStoreIndex.load(storage_context=storage_context)
53
- else:
54
- # Create new index
55
- if not os.path.exists(PDF_DIR):
56
- raise FileNotFoundError(f"Add medical PDFs to {PDF_DIR} directory first")
57
-
58
- documents = SimpleDirectoryReader(PDF_DIR).load_data()
59
- nodes = parser.get_nodes_from_documents(documents)
60
-
61
- # Create FAISS index
62
- dimension = 384 # Match MiniLM embedding size
63
- faiss_index = faiss.IndexFlatL2(dimension)
64
- vector_store = FaissVectorStore(faiss_index=faiss_index)
65
-
66
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
67
- index = VectorStoreIndex(nodes, storage_context=storage_context)
68
-
69
- # Save for offline use
70
- index.storage_context.persist(persist_dir=INDEX_SAVE_PATH)
71
-
72
- # ====== Safety Layers ======
73
- def validate_response(response: str) -> str:
74
- """Implements WHO protocol constraints"""
75
- if len(response.split('\n')) > 6:
76
- return "⚠️ Protocol too complex - must be <6 steps\n\n" + response
77
- uncertainty_phrases = ["I think", "maybe", "not sure", "غير متأكد"]
78
- if any(phrase in response for phrase in uncertainty_phrases):
79
- return "⚠️ Consult supervisor - uncertain response\n\n" + response
80
- return response
81
-
82
- # ====== Query Engine ======
83
- query_engine = index.as_query_engine()
84
-
85
- # ====== Gradio Interface ======
86
- def ask_question(query):
87
- response = str(query_engine.query(query))
88
- return validate_response(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if __name__ == "__main__":
91
- gr.Interface(
92
- fn=ask_question,
93
- inputs=gr.Textbox(lines=2, placeholder="Ask a medical question..."),
94
- outputs="text",
95
- title="🩺 Gaza Field Medic Assistant (Offline)",
96
- description="WHO protocols • No internet required • Arabic/English"
97
- ).launch(server_name="0.0.0.0")
 
1
+
2
  import os
3
+ import sys
4
+ import json
5
+ import logging
6
+ import warnings
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Optional, Tuple
9
+ import hashlib
10
+ import pickle
11
+ from datetime import datetime
12
+
13
+ # Suppress warnings for cleaner output
14
+ warnings.filterwarnings("ignore")
15
+
16
+ # Core dependencies
17
  import gradio as gr
18
+ import numpy as np
19
+ import pandas as pd
20
+ from sentence_transformers import SentenceTransformer
21
+ import faiss
22
  import torch
23
+ from transformers import (
24
+ AutoTokenizer,
25
+ AutoModelForCausalLM,
26
+ pipeline,
27
+ BitsAndBytesConfig
28
+ )
29
+
30
+ # Document processing
31
  from llama_index.core import (
32
+ Document,
33
+ VectorStoreIndex,
34
+ ServiceContext,
35
  StorageContext,
36
+ load_index_from_storage
37
  )
38
+ from llama_index.core.node_parser import SentenceSplitter
39
  from llama_index.vector_stores.faiss import FaissVectorStore
40
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
41
  from llama_index.llms.huggingface import HuggingFaceLLM
 
 
 
42
 
43
+ # PDF processing
44
+ import PyPDF2
45
+ from io import BytesIO
46
+
47
+ # Medical knowledge validation
48
+ import re
49
+ from difflib import SequenceMatcher
50
+
51
+ # Configure logging
52
+ logging.basicConfig(
53
+ level=logging.INFO,
54
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
 
 
 
 
 
 
 
 
 
55
  )
56
+ logger = logging.getLogger(__name__)
57
+
58
+ class MedicalFactChecker:
59
+ """
60
+ Medical fact checking and hallucination detection system.
61
+ Validates generated responses against authoritative medical sources.
62
+ """
63
+
64
+ def __init__(self):
65
+ self.medical_facts = self._load_medical_facts()
66
+ self.contraindications = self._load_contraindications()
67
+ self.dosage_patterns = self._compile_dosage_patterns()
68
+
69
+ def _load_medical_facts(self) -> Dict[str, Any]:
70
+ """Load verified medical facts from authoritative sources."""
71
+ return {
72
+ "burn_treatment": {
73
+ "immediate_care": [
74
+ "Remove from heat source immediately",
75
+ "Cool with clean water for 10-20 minutes",
76
+ "Remove jewelry and loose clothing before swelling",
77
+ "Cover with clean, dry cloth",
78
+ "Do not apply ice, butter, or oils"
79
+ ],
80
+ "severity_assessment": {
81
+ "first_degree": "Affects only outer layer of skin, red and painful",
82
+ "second_degree": "Affects outer and underlying layer, blisters form",
83
+ "third_degree": "Affects all layers, may appear white or charred"
84
+ }
85
+ },
86
+ "wound_care": {
87
+ "cleaning": [
88
+ "Clean hands before treating wounds",
89
+ "Rinse wound with clean water",
90
+ "Apply gentle pressure to stop bleeding",
91
+ "Cover with sterile bandage"
92
+ ],
93
+ "infection_signs": [
94
+ "Increased pain, redness, swelling",
95
+ "Warmth around wound",
96
+ "Pus or unusual discharge",
97
+ "Red streaking from wound",
98
+ "Fever"
99
+ ]
100
+ },
101
+ "emergency_priorities": {
102
+ "abc_assessment": [
103
+ "Airway - ensure clear and open",
104
+ "Breathing - check for normal breathing",
105
+ "Circulation - check pulse and control bleeding"
106
+ ]
107
+ }
108
+ }
109
+
110
+ def _load_contraindications(self) -> Dict[str, List[str]]:
111
+ """Load medical contraindications and dangerous practices."""
112
+ return {
113
+ "burns": [
114
+ "Do not apply ice directly to burns",
115
+ "Do not use butter, oils, or home remedies",
116
+ "Do not break blisters",
117
+ "Do not remove clothing stuck to burn"
118
+ ],
119
+ "wounds": [
120
+ "Do not remove embedded objects",
121
+ "Do not use hydrogen peroxide on deep wounds",
122
+ "Do not ignore signs of infection"
123
+ ],
124
+ "general": [
125
+ "Do not move suspected spinal injury patients unnecessarily",
126
+ "Do not give food or water to unconscious patients",
127
+ "Do not leave patients unattended if condition is serious"
128
+ ]
129
+ }
130
+
131
+ def _compile_dosage_patterns(self) -> List[re.Pattern]:
132
+ """Compile regex patterns for detecting medication dosages."""
133
+ patterns = [
134
+ r'\d+\s*mg\b', # milligrams
135
+ r'\d+\s*g\b', # grams
136
+ r'\d+\s*ml\b', # milliliters
137
+ r'\d+\s*tablets?\b', # tablets
138
+ r'\d+\s*times?\s+(?:per\s+)?day\b', # frequency
139
+ r'every\s+\d+\s+hours?\b' # intervals
140
+ ]
141
+ return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
142
+
143
+ def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
144
+ """
145
+ Check medical accuracy of generated response against context and facts.
146
+
147
+ Args:
148
+ response: Generated response text
149
+ context: Retrieved context from knowledge base
150
+
151
+ Returns:
152
+ Dictionary with accuracy assessment and confidence score
153
+ """
154
+ accuracy_score = 0.0
155
+ issues = []
156
+ warnings = []
157
+
158
+ # Check for contraindications
159
+ contraindication_issues = self._check_contraindications(response)
160
+ if contraindication_issues:
161
+ issues.extend(contraindication_issues)
162
+ accuracy_score -= 0.3
163
+
164
+ # Check context alignment
165
+ context_similarity = self._calculate_context_similarity(response, context)
166
+ if context_similarity < 0.7:
167
+ warnings.append(f"Low context similarity: {context_similarity:.2f}")
168
+ accuracy_score -= 0.2
169
+
170
+ # Check for unsupported medical claims
171
+ unsupported_claims = self._detect_unsupported_claims(response, context)
172
+ if unsupported_claims:
173
+ issues.extend(unsupported_claims)
174
+ accuracy_score -= 0.4
175
+
176
+ # Check dosage information if present
177
+ dosage_issues = self._validate_dosages(response)
178
+ if dosage_issues:
179
+ warnings.extend(dosage_issues)
180
+ accuracy_score -= 0.1
181
+
182
+ # Calculate final confidence score
183
+ confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))
184
+
185
+ return {
186
+ "confidence_score": confidence_score,
187
+ "issues": issues,
188
+ "warnings": warnings,
189
+ "context_similarity": context_similarity,
190
+ "is_safe": len(issues) == 0 and confidence_score > 0.6
191
+ }
192
+
193
+ def _check_contraindications(self, response: str) -> List[str]:
194
+ """Check for dangerous medical advice in response."""
195
+ issues = []
196
+ response_lower = response.lower()
197
+
198
+ for category, contraindications in self.contraindications.items():
199
+ for contraindication in contraindications:
200
+ # Simple keyword matching for contraindications
201
+ keywords = contraindication.lower().split()
202
+ if len(keywords) > 2: # Check for phrase presence
203
+ key_phrase = " ".join(keywords[2:]) # Remove "do not"
204
+ if key_phrase in response_lower and "do not" not in response_lower:
205
+ issues.append(f"Potential contraindication detected: {contraindication}")
206
+
207
+ return issues
208
+
209
+ def _calculate_context_similarity(self, response: str, context: str) -> float:
210
+ """Calculate semantic similarity between response and context."""
211
+ if not context or not response:
212
+ return 0.0
213
+
214
+ # Simple similarity based on common medical terms
215
+ response_words = set(response.lower().split())
216
+ context_words = set(context.lower().split())
217
+
218
+ if not response_words or not context_words:
219
+ return 0.0
220
+
221
+ intersection = response_words.intersection(context_words)
222
+ union = response_words.union(context_words)
223
+
224
+ return len(intersection) / len(union) if union else 0.0
225
+
226
+ def _detect_unsupported_claims(self, response: str, context: str) -> List[str]:
227
+ """Detect medical claims not supported by context."""
228
+ issues = []
229
+
230
+ # Look for definitive medical statements
231
+ definitive_patterns = [
232
+ r'always\s+(?:use|take|apply)',
233
+ r'never\s+(?:use|take|apply)',
234
+ r'will\s+(?:cure|heal|fix)',
235
+ r'guaranteed\s+to',
236
+ r'completely\s+(?:safe|effective)'
237
+ ]
238
+
239
+ for pattern in definitive_patterns:
240
+ if re.search(pattern, response, re.IGNORECASE):
241
+ if not self._claim_supported_by_context(pattern, context):
242
+ issues.append(f"Unsupported definitive claim detected: {pattern}")
243
+
244
+ return issues
245
+
246
+ def _claim_supported_by_context(self, claim_pattern: str, context: str) -> bool:
247
+ """Check if a claim is supported by the context."""
248
+ # Simplified check - in production, this would be more sophisticated
249
+ return len(context) > 100 # Basic context length check
250
+
251
+ def _validate_dosages(self, response: str) -> List[str]:
252
+ """Validate any dosage information in the response."""
253
+ warnings = []
254
+
255
+ for pattern in self.dosage_patterns:
256
+ matches = pattern.findall(response)
257
+ if matches:
258
+ warnings.append("Dosage information detected - verify with medical professional")
259
+ break
260
+
261
+ return warnings
262
+
263
+ class GazaKnowledgeBase:
264
+ """
265
+ Specialized knowledge base for Gaza medical information.
266
+ Handles document processing, indexing, and retrieval.
267
+ """
268
+
269
+ def __init__(self, data_dir: str = "./data"):
270
+ self.data_dir = Path(data_dir)
271
+ self.embedding_model = None
272
+ self.vector_store = None
273
+ self.index = None
274
+ self.documents = []
275
+
276
+ # Gaza-specific medical priorities
277
+ self.medical_priorities = {
278
+ "trauma": ["gunshot wounds", "blast injuries", "burns", "fractures"],
279
+ "infectious": ["cholera", "dysentery", "respiratory infections"],
280
+ "chronic": ["diabetes", "hypertension", "malnutrition"],
281
+ "emergency": ["cardiac arrest", "severe bleeding", "airway obstruction"]
282
+ }
283
+
284
+ def initialize(self):
285
+ """Initialize the knowledge base with embeddings and vector store."""
286
+ logger.info("Initializing Gaza Knowledge Base...")
287
+
288
+ # Initialize embedding model
289
+ self.embedding_model = SentenceTransformer(
290
+ 'sentence-transformers/all-MiniLM-L6-v2',
291
+ device='cpu' # Use CPU for better compatibility
292
+ )
293
+
294
+ # Load or create vector store
295
+ self._load_or_create_vector_store()
296
+
297
+ logger.info("Knowledge base initialization complete.")
298
+
299
+ def _load_or_create_vector_store(self):
300
+ """Load existing vector store or create new one."""
301
+ vector_store_path = self.data_dir / "vector_store"
302
+
303
+ if vector_store_path.exists():
304
+ logger.info("Loading existing vector store...")
305
+ self._load_vector_store(vector_store_path)
306
+ else:
307
+ logger.info("Creating new vector store...")
308
+ self._create_vector_store()
309
+ self._save_vector_store(vector_store_path)
310
+
311
+ def _create_vector_store(self):
312
+ """Create vector store from documents."""
313
+ # Load documents
314
+ self.documents = self._load_documents()
315
+
316
+ if not self.documents:
317
+ logger.warning("No documents found. Creating empty vector store.")
318
+ # Create empty FAISS index
319
+ dimension = 384 # all-MiniLM-L6-v2 dimension
320
+ self.vector_store = faiss.IndexFlatL2(dimension)
321
+ return
322
+
323
+ # Process documents into chunks
324
+ chunks = self._process_documents(self.documents)
325
+
326
+ # Create embeddings
327
+ embeddings = self._create_embeddings(chunks)
328
+
329
+ # Create FAISS index
330
+ dimension = embeddings.shape[1]
331
+ self.vector_store = faiss.IndexFlatL2(dimension)
332
+ self.vector_store.add(embeddings.astype('float32'))
333
+
334
+ # Store chunk metadata
335
+ self.chunk_metadata = chunks
336
+
337
+ logger.info(f"Created vector store with {len(chunks)} chunks")
338
+
339
+ def _load_documents(self) -> List[Document]:
340
+ """Load medical documents from data directory."""
341
+ documents = []
342
+
343
+ if not self.data_dir.exists():
344
+ logger.warning(f"Data directory {self.data_dir} does not exist")
345
+ return documents
346
+
347
+ # Load PDF files
348
+ for pdf_file in self.data_dir.glob("*.pdf"):
349
+ try:
350
+ doc_text = self._extract_pdf_text(pdf_file)
351
+ if doc_text:
352
+ documents.append(Document(
353
+ text=doc_text,
354
+ metadata={"source": str(pdf_file), "type": "pdf"}
355
+ ))
356
+ logger.info(f"Loaded document: {pdf_file.name}")
357
+ except Exception as e:
358
+ logger.error(f"Error loading {pdf_file}: {e}")
359
+
360
+ # Load text files
361
+ for txt_file in self.data_dir.glob("*.txt"):
362
+ try:
363
+ with open(txt_file, 'r', encoding='utf-8') as f:
364
+ doc_text = f.read()
365
+ documents.append(Document(
366
+ text=doc_text,
367
+ metadata={"source": str(txt_file), "type": "text"}
368
+ ))
369
+ logger.info(f"Loaded document: {txt_file.name}")
370
+ except Exception as e:
371
+ logger.error(f"Error loading {txt_file}: {e}")
372
+
373
+ return documents
374
+
375
+ def _extract_pdf_text(self, pdf_path: Path) -> str:
376
+ """Extract text from PDF file."""
377
+ try:
378
+ with open(pdf_path, 'rb') as file:
379
+ pdf_reader = PyPDF2.PdfReader(file)
380
+ text = ""
381
+ for page in pdf_reader.pages:
382
+ text += page.extract_text() + "\n"
383
+ return text
384
+ except Exception as e:
385
+ logger.error(f"Error extracting text from {pdf_path}: {e}")
386
+ return ""
387
+
388
+ def _process_documents(self, documents: List[Document]) -> List[Dict[str, Any]]:
389
+ """Process documents into chunks with metadata."""
390
+ chunks = []
391
+
392
+ # Initialize sentence splitter
393
+ splitter = SentenceSplitter(
394
+ chunk_size=512,
395
+ chunk_overlap=50
396
+ )
397
+
398
+ for doc in documents:
399
+ # Split document into chunks
400
+ doc_chunks = splitter.split_text(doc.text)
401
+
402
+ for i, chunk_text in enumerate(doc_chunks):
403
+ # Enhance chunk with Gaza-specific medical context
404
+ enhanced_chunk = self._enhance_medical_context(chunk_text)
405
+
406
+ chunks.append({
407
+ "text": enhanced_chunk,
408
+ "original_text": chunk_text,
409
+ "source": doc.metadata.get("source", "unknown"),
410
+ "chunk_id": f"{doc.metadata.get('source', 'unknown')}_{i}",
411
+ "medical_priority": self._assess_medical_priority(chunk_text)
412
+ })
413
+
414
+ return chunks
415
+
416
+ def _enhance_medical_context(self, text: str) -> str:
417
+ """Enhance text with Gaza-specific medical context."""
418
+ # Add context about resource constraints
419
+ if any(term in text.lower() for term in ["treatment", "medication", "supplies"]):
420
+ text += "\n[Gaza Context: Consider resource limitations and alternative treatments when standard supplies are unavailable.]"
421
+
422
+ # Add urgency context for trauma
423
+ if any(term in text.lower() for term in ["bleeding", "wound", "trauma", "injury"]):
424
+ text += "\n[Gaza Context: In conflict situations, prioritize immediate life-saving interventions.]"
425
+
426
+ return text
427
+
428
+ def _assess_medical_priority(self, text: str) -> str:
429
+ """Assess medical priority level of text content."""
430
+ text_lower = text.lower()
431
+
432
+ for priority, keywords in self.medical_priorities.items():
433
+ if any(keyword in text_lower for keyword in keywords):
434
+ return priority
435
+
436
+ return "general"
437
+
438
+ def _create_embeddings(self, chunks: List[Dict[str, Any]]) -> np.ndarray:
439
+ """Create embeddings for text chunks."""
440
+ texts = [chunk["text"] for chunk in chunks]
441
+ embeddings = self.embedding_model.encode(texts, show_progress_bar=True)
442
+ return embeddings
443
+
444
+ def _save_vector_store(self, path: Path):
445
+ """Save vector store and metadata to disk."""
446
+ path.mkdir(parents=True, exist_ok=True)
447
+
448
+ # Save FAISS index
449
+ faiss.write_index(self.vector_store, str(path / "index.faiss"))
450
+
451
+ # Save metadata
452
+ with open(path / "metadata.pkl", 'wb') as f:
453
+ pickle.dump(self.chunk_metadata, f)
454
+
455
+ logger.info(f"Vector store saved to {path}")
456
+
457
+ def _load_vector_store(self, path: Path):
458
+ """Load vector store and metadata from disk."""
459
+ # Load FAISS index
460
+ self.vector_store = faiss.read_index(str(path / "index.faiss"))
461
+
462
+ # Load metadata
463
+ with open(path / "metadata.pkl", 'rb') as f:
464
+ self.chunk_metadata = pickle.load(f)
465
+
466
+ logger.info(f"Vector store loaded from {path}")
467
+
468
+ def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
469
+ """Search for relevant medical information."""
470
+ if self.vector_store is None:
471
+ return []
472
+
473
+ # Create query embedding
474
+ query_embedding = self.embedding_model.encode([query])
475
+
476
+ # Search vector store
477
+ scores, indices = self.vector_store.search(
478
+ query_embedding.astype('float32'), k
479
+ )
480
+
481
+ # Prepare results
482
+ results = []
483
+ for score, idx in zip(scores[0], indices[0]):
484
+ if idx < len(self.chunk_metadata):
485
+ chunk = self.chunk_metadata[idx]
486
+ results.append({
487
+ "text": chunk["original_text"],
488
+ "source": chunk["source"],
489
+ "score": float(score),
490
+ "medical_priority": chunk["medical_priority"]
491
+ })
492
+
493
+ return results
494
+
495
+ class GazaRAGSystem:
496
+ """
497
+ Main RAG system for Gaza First Aid Assistant.
498
+ Integrates knowledge base, language model, and safety checks.
499
+ """
500
+
501
+ def __init__(self):
502
+ self.knowledge_base = GazaKnowledgeBase()
503
+ self.fact_checker = MedicalFactChecker()
504
+ self.llm = None
505
+ self.tokenizer = None
506
+
507
+ # System prompts
508
+ self.system_prompt = self._create_system_prompt()
509
+
510
+ def initialize(self):
511
+ """Initialize the RAG system."""
512
+ logger.info("Initializing Gaza RAG System...")
513
+
514
+ # Initialize knowledge base
515
+ self.knowledge_base.initialize()
516
+
517
+ # Initialize language model
518
+ self._initialize_llm()
519
+
520
+ logger.info("RAG system initialization complete.")
521
+
522
+ def _initialize_llm(self):
523
+ """Initialize the language model with optimization for medical use."""
524
+ model_name = "microsoft/Phi-3-mini-4k-instruct"
525
+
526
+ try:
527
+ # Configure quantization for efficiency
528
+ quantization_config = BitsAndBytesConfig(
529
+ load_in_4bit=True,
530
+ bnb_4bit_compute_dtype=torch.float16,
531
+ bnb_4bit_use_double_quant=True,
532
+ bnb_4bit_quant_type="nf4"
533
+ )
534
+
535
+ # Load tokenizer
536
+ self.tokenizer = AutoTokenizer.from_pretrained(
537
+ model_name,
538
+ trust_remote_code=True
539
+ )
540
+
541
+ # Load model
542
+ self.llm = AutoModelForCausalLM.from_pretrained(
543
+ model_name,
544
+ quantization_config=quantization_config,
545
+ device_map="auto",
546
+ trust_remote_code=True,
547
+ torch_dtype=torch.float16
548
+ )
549
+
550
+ logger.info(f"Loaded model: {model_name}")
551
+
552
+ except Exception as e:
553
+ logger.error(f"Error loading model: {e}")
554
+ # Fallback to a simpler model or CPU-only mode
555
+ self._initialize_fallback_llm()
556
+
557
+ def _initialize_fallback_llm(self):
558
+ """Initialize fallback LLM for cases where main model fails."""
559
+ try:
560
+ # Use a smaller, more compatible model
561
+ model_name = "microsoft/DialoGPT-medium"
562
+
563
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
564
+ self.llm = AutoModelForCausalLM.from_pretrained(
565
+ model_name,
566
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
567
+ )
568
+
569
+ logger.info(f"Loaded fallback model: {model_name}")
570
+
571
+ except Exception as e:
572
+ logger.error(f"Error loading fallback model: {e}")
573
+ self.llm = None
574
+ self.tokenizer = None
575
+
576
+ def _create_system_prompt(self) -> str:
577
+ """Create system prompt for medical AI assistant."""
578
+ return """You are a specialized medical AI assistant designed to provide first aid guidance for healthcare workers in Gaza. Your responses must be:
579
+
580
+ 1. MEDICALLY ACCURATE: Base all advice on established medical protocols from WHO, ICRC, and MSF guidelines.
581
+
582
+ 2. RESOURCE-AWARE: Consider the limited medical supplies and infrastructure in Gaza. Suggest alternatives when standard treatments are unavailable.
583
+
584
+ 3. SAFETY-FIRST: Always prioritize patient safety. If uncertain, recommend seeking professional medical attention.
585
+
586
+ 4. CLEAR AND ACTIONABLE: Provide step-by-step instructions that can be followed by healthcare workers under pressure.
587
+
588
+ 5. CONTEXT-APPROPRIATE: Consider the conflict environment and adapt advice accordingly.
589
+
590
+ IMPORTANT SAFETY GUIDELINES:
591
+ - Never provide definitive diagnoses
592
+ - Always recommend professional medical evaluation for serious conditions
593
+ - Clearly state when immediate emergency care is needed
594
+ - Acknowledge limitations of remote medical advice
595
+ - Provide source attribution when possible
596
+
597
+ Remember: You are providing guidance to support medical professionals, not replace them."""
598
+
599
+ def generate_response(self, query: str) -> Dict[str, Any]:
600
+ """Generate response to medical query with safety checks."""
601
+ try:
602
+ # Search knowledge base
603
+ search_results = self.knowledge_base.search(query, k=3)
604
+
605
+ # Prepare context
606
+ context = self._prepare_context(search_results)
607
+
608
+ # Generate response
609
+ response = self._generate_llm_response(query, context)
610
+
611
+ # Perform safety checks
612
+ safety_check = self.fact_checker.check_medical_accuracy(response, context)
613
+
614
+ # Prepare final response
615
+ final_response = self._prepare_final_response(
616
+ query, response, search_results, safety_check
617
+ )
618
+
619
+ return final_response
620
+
621
+ except Exception as e:
622
+ logger.error(f"Error generating response: {e}")
623
+ return self._create_error_response(str(e))
624
+
625
+ def _prepare_context(self, search_results: List[Dict[str, Any]]) -> str:
626
+ """Prepare context from search results."""
627
+ if not search_results:
628
+ return "No specific medical information found in knowledge base."
629
+
630
+ context_parts = []
631
+ for result in search_results:
632
+ context_parts.append(f"Source: {result['source']}")
633
+ context_parts.append(f"Content: {result['text']}")
634
+ context_parts.append("---")
635
+
636
+ return "\n".join(context_parts)
637
+
638
+ def _generate_llm_response(self, query: str, context: str) -> str:
639
+ """Generate response using language model."""
640
+ if self.llm is None or self.tokenizer is None:
641
+ return self._generate_fallback_response(query, context)
642
+
643
+ try:
644
+ # Prepare prompt
645
+ prompt = f"""{self.system_prompt}
646
+
647
+ Context from medical knowledge base:
648
+ {context}
649
+
650
+ User Question: {query}
651
+
652
+ Medical Response:"""
653
+
654
+ # Tokenize input
655
+ inputs = self.tokenizer.encode(prompt, return_tensors="pt")
656
+
657
+ # Generate response
658
+ with torch.no_grad():
659
+ outputs = self.llm.generate(
660
+ inputs,
661
+ max_new_tokens=512,
662
+ temperature=0.3, # Low temperature for medical accuracy
663
+ do_sample=True,
664
+ pad_token_id=self.tokenizer.eos_token_id,
665
+ repetition_penalty=1.1
666
+ )
667
+
668
+ # Decode response
669
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
670
+
671
+ # Extract generated part
672
+ response = response[len(prompt):].strip()
673
+
674
+ return response
675
+
676
+ except Exception as e:
677
+ logger.error(f"Error in LLM generation: {e}")
678
+ return self._generate_fallback_response(query, context)
679
+
680
+ def _generate_fallback_response(self, query: str, context: str) -> str:
681
+ """Generate fallback response when LLM is unavailable."""
682
+ return f"""I apologize, but I'm currently unable to process your medical query due to technical limitations.
683
+
684
+ For the question: "{query}"
685
+
686
+ Please consult the following medical resources:
687
+ - WHO Emergency Care Guidelines
688
+ - ICRC First Aid Manual
689
+ - Local medical professionals
690
+
691
+ In any medical emergency, seek immediate professional medical attention.
692
 
693
+ Available context from knowledge base:
694
+ {context[:500]}..."""
695
+
696
+ def _prepare_final_response(
697
+ self,
698
+ query: str,
699
+ response: str,
700
+ search_results: List[Dict[str, Any]],
701
+ safety_check: Dict[str, Any]
702
+ ) -> Dict[str, Any]:
703
+ """Prepare final response with safety information."""
704
+
705
+ # Add safety warnings if needed
706
+ if not safety_check["is_safe"]:
707
+ response = f"⚠️ SAFETY WARNING: This response requires verification.\n\n{response}"
708
+
709
+ if safety_check["confidence_score"] < 0.7:
710
+ response += f"\n\n⚠️ Confidence Level: {safety_check['confidence_score']:.1%} - Please verify with medical professional."
711
+
712
+ # Add source attribution
713
+ if search_results:
714
+ sources = list(set([result["source"] for result in search_results]))
715
+ response += f"\n\nSources: {', '.join(sources)}"
716
+
717
+ # Add disclaimer
718
+ response += "\n\n⚠️ MEDICAL DISCLAIMER: This is AI-generated guidance for educational purposes. Always consult qualified medical professionals for diagnosis and treatment decisions."
719
+
720
+ return {
721
+ "response": response,
722
+ "confidence_score": safety_check["confidence_score"],
723
+ "safety_issues": safety_check["issues"],
724
+ "safety_warnings": safety_check["warnings"],
725
+ "sources": [result["source"] for result in search_results],
726
+ "timestamp": datetime.now().isoformat()
727
+ }
728
+
729
+ def _create_error_response(self, error_message: str) -> Dict[str, Any]:
730
+ """Create error response."""
731
+ return {
732
+ "response": f"I apologize, but I encountered an error processing your request: {error_message}\n\nPlease try rephrasing your question or consult medical professionals directly.",
733
+ "confidence_score": 0.0,
734
+ "safety_issues": ["System error occurred"],
735
+ "safety_warnings": ["Unable to verify medical accuracy due to system error"],
736
+ "sources": [],
737
+ "timestamp": datetime.now().isoformat()
738
+ }
739
+
740
+ # Global RAG system instance
741
+ rag_system = None
742
+
743
+ def initialize_system():
744
+ """Initialize the RAG system."""
745
+ global rag_system
746
+
747
+ if rag_system is None:
748
+ rag_system = GazaRAGSystem()
749
+ rag_system.initialize()
750
+
751
+ return rag_system
752
+
753
+ def process_medical_query(query: str) -> str:
754
+ """Process medical query and return response."""
755
+ if not query.strip():
756
+ return "Please enter a medical question."
757
+
758
+ try:
759
+ # Initialize system if needed
760
+ system = initialize_system()
761
+
762
+ # Generate response
763
+ result = system.generate_response(query)
764
+
765
+ return result["response"]
766
+
767
+ except Exception as e:
768
+ logger.error(f"Error processing query: {e}")
769
+ return f"I apologize, but I encountered an error: {str(e)}\n\nPlease try again or consult medical professionals directly."
770
+
771
+ def create_gradio_interface():
772
+ """Create Gradio interface for the application."""
773
+
774
+ # Custom CSS for medical theme
775
+ css = """
776
+ .medical-header {
777
+ background: linear-gradient(90deg, #2c5aa0 0%, #1e3a8a 100%);
778
+ color: white;
779
+ padding: 20px;
780
+ border-radius: 10px;
781
+ margin-bottom: 20px;
782
+ text-align: center;
783
+ }
784
+
785
+ .warning-box {
786
+ background-color: #fef3cd;
787
+ border: 1px solid #ffeaa7;
788
+ border-radius: 5px;
789
+ padding: 15px;
790
+ margin: 10px 0;
791
+ }
792
+
793
+ .emergency-notice {
794
+ background-color: #f8d7da;
795
+ border: 1px solid #f5c6cb;
796
+ border-radius: 5px;
797
+ padding: 15px;
798
+ margin: 10px 0;
799
+ font-weight: bold;
800
+ }
801
+ """
802
+
803
+ with gr.Blocks(css=css, title="Gaza First Aid Assistant") as interface:
804
+
805
+ # Header
806
+ gr.HTML("""
807
+ <div class="medical-header">
808
+ <h1>🏥 Gaza First Aid Assistant</h1>
809
+ <p>Specialized Medical Guidance for Healthcare Workers in Gaza</p>
810
+ <p><em>Enhanced with Offline Capabilities and Safety Validation</em></p>
811
+ </div>
812
+ """)
813
+
814
+ # Emergency notice
815
+ gr.HTML("""
816
+ <div class="emergency-notice">
817
+ 🚨 EMERGENCY NOTICE: For life-threatening emergencies, seek immediate professional medical attention.
818
+ This AI assistant provides guidance to support, not replace, medical professionals.
819
+ </div>
820
+ """)
821
+
822
+ # Main interface
823
+ with gr.Row():
824
+ with gr.Column(scale=2):
825
+ query_input = gr.Textbox(
826
+ label="Medical Question",
827
+ placeholder="Enter your first aid or medical question here...",
828
+ lines=3
829
+ )
830
+
831
+ submit_btn = gr.Button("Get Medical Guidance", variant="primary")
832
+
833
+ # Example queries
834
+ gr.Examples(
835
+ examples=[
836
+ "My patient is feeling dizzy, what do i do",
837
+ "How to treat a gun wound",
838
+ "How do i treat patients with stab wounds",
839
+ "How to treat injuries from shrapnel",
840
+ "How to treat a burn when clean water is limited?",
841
+ "What are the signs of infection in a wound?",
842
+ "How to stop severe bleeding with improvised materials?",
843
+ "What to do for someone with difficulty breathing?",
844
+ "How to treat dehydration in children?"
845
+ ],
846
+ inputs=query_input
847
+ )
848
+
849
+ with gr.Column(scale=3):
850
+ response_output = gr.Textbox(
851
+ label="Medical Guidance",
852
+ lines=15,
853
+ max_lines=20
854
+ )
855
+
856
+ # Warning and disclaimer
857
+ gr.HTML("""
858
+ <div class="warning-box">
859
+ <h3>⚠️ Important Medical Disclaimer</h3>
860
+ <ul>
861
+ <li>This AI assistant provides educational guidance based on established medical protocols</li>
862
+ <li>Always verify information with qualified medical professionals</li>
863
+ <li>In emergencies, prioritize immediate professional medical care</li>
864
+ <li>Consider local resource constraints and adapt guidance accordingly</li>
865
+ <li>This tool is designed to support, not replace, medical training and judgment</li>
866
+ </ul>
867
+ </div>
868
+ """)
869
+
870
+ # Information about the system
871
+ with gr.Accordion("About This System", open=False):
872
+ gr.Markdown("""
873
+ ### Gaza First Aid Assistant - Enhanced Version
874
+
875
+ This specialized medical AI assistant is designed specifically for healthcare workers in Gaza,
876
+ incorporating:
877
+
878
+ - **Offline-First Architecture**: Reduced dependency on external services
879
+ - **Gaza-Specific Medical Knowledge**: WHO, ICRC, and MSF guidelines adapted for local conditions
880
+ - **Comprehensive Safety Validation**: Multiple layers of medical fact-checking
881
+ - **Resource-Aware Guidance**: Considers limited supplies and infrastructure
882
+ - **Conflict-Adapted Protocols**: Medical advice tailored for conflict environments
883
+
884
+ **Knowledge Sources:**
885
+ - World Health Organization (WHO) Burn Prevention and Care Guidelines
886
+ - International Committee of the Red Cross (ICRC) War Surgery Manuals
887
+ - Médecins Sans Frontières (MSF) Field Guides
888
+ - Palestine Red Crescent Society (PRCS) Field Experience
889
+ - Standard First Aid and Emergency Medical Protocols
890
+
891
+ **Version**: 2.0 | **Last Updated**: July 2025
892
+ """)
893
+
894
+ # Event handlers
895
+ submit_btn.click(
896
+ fn=process_medical_query,
897
+ inputs=query_input,
898
+ outputs=response_output
899
+ )
900
+
901
+ query_input.submit(
902
+ fn=process_medical_query,
903
+ inputs=query_input,
904
+ outputs=response_output
905
+ )
906
+
907
+ return interface
908
+
909
+ def main():
910
+ """Main application entry point."""
911
+ logger.info("Starting Gaza First Aid Assistant...")
912
+
913
+ try:
914
+ # Create and launch interface
915
+ interface = create_gradio_interface()
916
+
917
+ # Launch with appropriate settings
918
+ interface.launch(
919
+ server_name="0.0.0.0",
920
+ server_port=7860,
921
+ share=False, # Set to True for public sharing
922
+ debug=False
923
+ )
924
+
925
+ except Exception as e:
926
+ logger.error(f"Error launching application: {e}")
927
+ sys.exit(1)
928
 
929
  if __name__ == "__main__":
930
+ main()