import os import sys import json import logging import warnings from pathlib import Path from typing import List, Dict, Any, Optional, Tuple import hashlib import pickle from datetime import datetime # Suppress warnings for cleaner output warnings.filterwarnings("ignore") # Core dependencies import gradio as gr import numpy as np import pandas as pd from sentence_transformers import SentenceTransformer import faiss import torch from transformers import ( AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig ) # Document processing from llama_index.core import ( Document, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage ) from llama_index.core.node_parser import SentenceSplitter from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.huggingface import HuggingFaceLLM # PDF processing import PyPDF2 from io import BytesIO # Medical knowledge validation import re from difflib import SequenceMatcher # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class MedicalFactChecker: """ Medical fact checking and hallucination detection system. Validates generated responses against authoritative medical sources. """ def __init__(self): self.medical_facts = self._load_medical_facts() self.contraindications = self._load_contraindications() self.dosage_patterns = self._compile_dosage_patterns() def _load_medical_facts(self) -> Dict[str, Any]: """Load verified medical facts from authoritative sources.""" return { "burn_treatment": { "immediate_care": [ "Remove from heat source immediately", "Cool with clean water for 10-20 minutes", "Remove jewelry and loose clothing before swelling", "Cover with clean, dry cloth", "Do not apply ice, butter, or oils" ], "severity_assessment": { "first_degree": "Affects only outer layer of skin, red and painful", "second_degree": "Affects outer and underlying layer, blisters form", "third_degree": "Affects all layers, may appear white or charred" } }, "wound_care": { "cleaning": [ "Clean hands before treating wounds", "Rinse wound with clean water", "Apply gentle pressure to stop bleeding", "Cover with sterile bandage" ], "infection_signs": [ "Increased pain, redness, swelling", "Warmth around wound", "Pus or unusual discharge", "Red streaking from wound", "Fever" ] }, "emergency_priorities": { "abc_assessment": [ "Airway - ensure clear and open", "Breathing - check for normal breathing", "Circulation - check pulse and control bleeding" ] } } def _load_contraindications(self) -> Dict[str, List[str]]: """Load medical contraindications and dangerous practices.""" return { "burns": [ "Do not apply ice directly to burns", "Do not use butter, oils, or home remedies", "Do not break blisters", "Do not remove clothing stuck to burn" ], "wounds": [ "Do not remove embedded objects", "Do not use hydrogen peroxide on deep wounds", "Do not ignore signs of infection" ], "general": [ "Do not move suspected spinal injury patients unnecessarily", "Do not give food or water to unconscious patients", "Do not leave patients unattended if condition is serious" ] } def _compile_dosage_patterns(self) -> List[re.Pattern]: """Compile regex patterns for detecting medication dosages.""" patterns = [ r'\d+\s*mg\b', # milligrams r'\d+\s*g\b', # grams r'\d+\s*ml\b', # milliliters r'\d+\s*tablets?\b', # tablets r'\d+\s*times?\s+(?:per\s+)?day\b', # frequency r'every\s+\d+\s+hours?\b' # intervals ] return [re.compile(pattern, re.IGNORECASE) for pattern in patterns] def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]: """ Check medical accuracy of generated response against context and facts. Args: response: Generated response text context: Retrieved context from knowledge base Returns: Dictionary with accuracy assessment and confidence score """ accuracy_score = 0.0 issues = [] warnings = [] # Check for contraindications contraindication_issues = self._check_contraindications(response) if contraindication_issues: issues.extend(contraindication_issues) accuracy_score -= 0.3 # Check context alignment context_similarity = self._calculate_context_similarity(response, context) if context_similarity < 0.7: warnings.append(f"Low context similarity: {context_similarity:.2f}") accuracy_score -= 0.2 # Check for unsupported medical claims unsupported_claims = self._detect_unsupported_claims(response, context) if unsupported_claims: issues.extend(unsupported_claims) accuracy_score -= 0.4 # Check dosage information if present dosage_issues = self._validate_dosages(response) if dosage_issues: warnings.extend(dosage_issues) accuracy_score -= 0.1 # Calculate final confidence score confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score)) return { "confidence_score": confidence_score, "issues": issues, "warnings": warnings, "context_similarity": context_similarity, "is_safe": len(issues) == 0 and confidence_score > 0.6 } def _check_contraindications(self, response: str) -> List[str]: """Check for dangerous medical advice in response.""" issues = [] response_lower = response.lower() for category, contraindications in self.contraindications.items(): for contraindication in contraindications: # Simple keyword matching for contraindications keywords = contraindication.lower().split() if len(keywords) > 2: # Check for phrase presence key_phrase = " ".join(keywords[2:]) # Remove "do not" if key_phrase in response_lower and "do not" not in response_lower: issues.append(f"Potential contraindication detected: {contraindication}") return issues def _calculate_context_similarity(self, response: str, context: str) -> float: """Calculate semantic similarity between response and context.""" if not context or not response: return 0.0 # Simple similarity based on common medical terms response_words = set(response.lower().split()) context_words = set(context.lower().split()) if not response_words or not context_words: return 0.0 intersection = response_words.intersection(context_words) union = response_words.union(context_words) return len(intersection) / len(union) if union else 0.0 def _detect_unsupported_claims(self, response: str, context: str) -> List[str]: """Detect medical claims not supported by context.""" issues = [] # Look for definitive medical statements definitive_patterns = [ r'always\s+(?:use|take|apply)', r'never\s+(?:use|take|apply)', r'will\s+(?:cure|heal|fix)', r'guaranteed\s+to', r'completely\s+(?:safe|effective)' ] for pattern in definitive_patterns: if re.search(pattern, response, re.IGNORECASE): if not self._claim_supported_by_context(pattern, context): issues.append(f"Unsupported definitive claim detected: {pattern}") return issues def _claim_supported_by_context(self, claim_pattern: str, context: str) -> bool: """Check if a claim is supported by the context.""" # Simplified check - in production, this would be more sophisticated return len(context) > 100 # Basic context length check def _validate_dosages(self, response: str) -> List[str]: """Validate any dosage information in the response.""" warnings = [] for pattern in self.dosage_patterns: matches = pattern.findall(response) if matches: warnings.append("Dosage information detected - verify with medical professional") break return warnings class GazaKnowledgeBase: """ Specialized knowledge base for Gaza medical information. Handles document processing, indexing, and retrieval. """ def __init__(self, data_dir: str = "./data"): self.data_dir = Path(data_dir) self.embedding_model = None self.vector_store = None self.index = None self.documents = [] # Gaza-specific medical priorities self.medical_priorities = { "trauma": ["gunshot wounds", "blast injuries", "burns", "fractures"], "infectious": ["cholera", "dysentery", "respiratory infections"], "chronic": ["diabetes", "hypertension", "malnutrition"], "emergency": ["cardiac arrest", "severe bleeding", "airway obstruction"] } def initialize(self): """Initialize the knowledge base with embeddings and vector store.""" logger.info("Initializing Gaza Knowledge Base...") # Initialize embedding model self.embedding_model = SentenceTransformer( 'sentence-transformers/all-MiniLM-L6-v2', device='cpu' # Use CPU for better compatibility ) # Load or create vector store self._load_or_create_vector_store() logger.info("Knowledge base initialization complete.") def _load_or_create_vector_store(self): """Load existing vector store or create new one.""" vector_store_path = self.data_dir / "vector_store" if vector_store_path.exists(): logger.info("Loading existing vector store...") self._load_vector_store(vector_store_path) else: logger.info("Creating new vector store...") self._create_vector_store() self._save_vector_store(vector_store_path) def _create_vector_store(self): """Create vector store from documents.""" # Load documents self.documents = self._load_documents() if not self.documents: logger.warning("No documents found. Creating empty vector store.") # Create empty FAISS index dimension = 384 # all-MiniLM-L6-v2 dimension self.vector_store = faiss.IndexFlatL2(dimension) return # Process documents into chunks chunks = self._process_documents(self.documents) # Create embeddings embeddings = self._create_embeddings(chunks) # Create FAISS index dimension = embeddings.shape[1] self.vector_store = faiss.IndexFlatL2(dimension) self.vector_store.add(embeddings.astype('float32')) # Store chunk metadata self.chunk_metadata = chunks logger.info(f"Created vector store with {len(chunks)} chunks") def _load_documents(self) -> List[Document]: """Load medical documents from data directory.""" documents = [] if not self.data_dir.exists(): logger.warning(f"Data directory {self.data_dir} does not exist") return documents # Load PDF files for pdf_file in self.data_dir.glob("*.pdf"): try: doc_text = self._extract_pdf_text(pdf_file) if doc_text: documents.append(Document( text=doc_text, metadata={"source": str(pdf_file), "type": "pdf"} )) logger.info(f"Loaded document: {pdf_file.name}") except Exception as e: logger.error(f"Error loading {pdf_file}: {e}") # Load text files for txt_file in self.data_dir.glob("*.txt"): try: with open(txt_file, 'r', encoding='utf-8') as f: doc_text = f.read() documents.append(Document( text=doc_text, metadata={"source": str(txt_file), "type": "text"} )) logger.info(f"Loaded document: {txt_file.name}") except Exception as e: logger.error(f"Error loading {txt_file}: {e}") return documents def _extract_pdf_text(self, pdf_path: Path) -> str: """Extract text from PDF file.""" try: with open(pdf_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text except Exception as e: logger.error(f"Error extracting text from {pdf_path}: {e}") return "" def _process_documents(self, documents: List[Document]) -> List[Dict[str, Any]]: """Process documents into chunks with metadata.""" chunks = [] # Initialize sentence splitter splitter = SentenceSplitter( chunk_size=512, chunk_overlap=50 ) for doc in documents: # Split document into chunks doc_chunks = splitter.split_text(doc.text) for i, chunk_text in enumerate(doc_chunks): # Enhance chunk with Gaza-specific medical context enhanced_chunk = self._enhance_medical_context(chunk_text) chunks.append({ "text": enhanced_chunk, "original_text": chunk_text, "source": doc.metadata.get("source", "unknown"), "chunk_id": f"{doc.metadata.get('source', 'unknown')}_{i}", "medical_priority": self._assess_medical_priority(chunk_text) }) return chunks def _enhance_medical_context(self, text: str) -> str: """Enhance text with Gaza-specific medical context.""" # Add context about resource constraints if any(term in text.lower() for term in ["treatment", "medication", "supplies"]): text += "\n[Gaza Context: Consider resource limitations and alternative treatments when standard supplies are unavailable.]" # Add urgency context for trauma if any(term in text.lower() for term in ["bleeding", "wound", "trauma", "injury"]): text += "\n[Gaza Context: In conflict situations, prioritize immediate life-saving interventions.]" return text def _assess_medical_priority(self, text: str) -> str: """Assess medical priority level of text content.""" text_lower = text.lower() for priority, keywords in self.medical_priorities.items(): if any(keyword in text_lower for keyword in keywords): return priority return "general" def _create_embeddings(self, chunks: List[Dict[str, Any]]) -> np.ndarray: """Create embeddings for text chunks.""" texts = [chunk["text"] for chunk in chunks] embeddings = self.embedding_model.encode(texts, show_progress_bar=True) return embeddings def _save_vector_store(self, path: Path): """Save vector store and metadata to disk.""" path.mkdir(parents=True, exist_ok=True) # Save FAISS index faiss.write_index(self.vector_store, str(path / "index.faiss")) # Save metadata with open(path / "metadata.pkl", 'wb') as f: pickle.dump(self.chunk_metadata, f) logger.info(f"Vector store saved to {path}") def _load_vector_store(self, path: Path): """Load vector store and metadata from disk.""" # Load FAISS index self.vector_store = faiss.read_index(str(path / "index.faiss")) # Load metadata with open(path / "metadata.pkl", 'rb') as f: self.chunk_metadata = pickle.load(f) logger.info(f"Vector store loaded from {path}") def search(self, query: str, k: int = 5) -> List[Dict[str, Any]]: """Search for relevant medical information.""" if self.vector_store is None: return [] # Create query embedding query_embedding = self.embedding_model.encode([query]) # Search vector store scores, indices = self.vector_store.search( query_embedding.astype('float32'), k ) # Prepare results results = [] for score, idx in zip(scores[0], indices[0]): if idx < len(self.chunk_metadata): chunk = self.chunk_metadata[idx] results.append({ "text": chunk["original_text"], "source": chunk["source"], "score": float(score), "medical_priority": chunk["medical_priority"] }) return results class GazaRAGSystem: """ Main RAG system for Gaza First Aid Assistant. Integrates knowledge base, language model, and safety checks. """ def __init__(self): self.knowledge_base = GazaKnowledgeBase() self.fact_checker = MedicalFactChecker() self.llm = None self.tokenizer = None # System prompts self.system_prompt = self._create_system_prompt() def initialize(self): """Initialize the RAG system.""" logger.info("Initializing Gaza RAG System...") # Initialize knowledge base self.knowledge_base.initialize() # Initialize language model self._initialize_llm() logger.info("RAG system initialization complete.") def _initialize_llm(self): """Initialize the language model with optimization for medical use.""" model_name = "microsoft/Phi-3-mini-4k-instruct" try: # Configure quantization for efficiency quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) # Load tokenizer self.tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True ) # Load model self.llm = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quantization_config, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16 ) logger.info(f"Loaded model: {model_name}") except Exception as e: logger.error(f"Error loading model: {e}") # Fallback to a simpler model or CPU-only mode self._initialize_fallback_llm() def _initialize_fallback_llm(self): """Initialize fallback LLM for cases where main model fails.""" try: # Use a smaller, more compatible model model_name = "microsoft/DialoGPT-medium" self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.llm = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 ) logger.info(f"Loaded fallback model: {model_name}") except Exception as e: logger.error(f"Error loading fallback model: {e}") self.llm = None self.tokenizer = None def _create_system_prompt(self) -> str: """Create system prompt for medical AI assistant.""" return """You are a specialized medical AI assistant designed to provide first aid guidance for healthcare workers in Gaza. Your responses must be: 1. MEDICALLY ACCURATE: Base all advice on established medical protocols from WHO, ICRC, and MSF guidelines. 2. RESOURCE-AWARE: Consider the limited medical supplies and infrastructure in Gaza. Suggest alternatives when standard treatments are unavailable. 3. SAFETY-FIRST: Always prioritize patient safety. If uncertain, recommend seeking professional medical attention. 4. CLEAR AND ACTIONABLE: Provide step-by-step instructions that can be followed by healthcare workers under pressure. 5. CONTEXT-APPROPRIATE: Consider the conflict environment and adapt advice accordingly. IMPORTANT SAFETY GUIDELINES: - Never provide definitive diagnoses - Always recommend professional medical evaluation for serious conditions - Clearly state when immediate emergency care is needed - Acknowledge limitations of remote medical advice - Provide source attribution when possible Remember: You are providing guidance to support medical professionals, not replace them.""" def generate_response(self, query: str) -> Dict[str, Any]: """Generate response to medical query with safety checks.""" try: # Search knowledge base search_results = self.knowledge_base.search(query, k=3) # Prepare context context = self._prepare_context(search_results) # Generate response response = self._generate_llm_response(query, context) # Perform safety checks safety_check = self.fact_checker.check_medical_accuracy(response, context) # Prepare final response final_response = self._prepare_final_response( query, response, search_results, safety_check ) return final_response except Exception as e: logger.error(f"Error generating response: {e}") return self._create_error_response(str(e)) def _prepare_context(self, search_results: List[Dict[str, Any]]) -> str: """Prepare context from search results.""" if not search_results: return "No specific medical information found in knowledge base." context_parts = [] for result in search_results: context_parts.append(f"Source: {result['source']}") context_parts.append(f"Content: {result['text']}") context_parts.append("---") return "\n".join(context_parts) def _generate_llm_response(self, query: str, context: str) -> str: """Generate response using language model.""" if self.llm is None or self.tokenizer is None: return self._generate_fallback_response(query, context) try: # Prepare prompt prompt = f"""{self.system_prompt} Context from medical knowledge base: {context} User Question: {query} Medical Response:""" # Tokenize input inputs = self.tokenizer.encode(prompt, return_tensors="pt") # Generate response with torch.no_grad(): outputs = self.llm.generate( inputs, max_new_tokens=512, temperature=0.3, # Low temperature for medical accuracy do_sample=True, pad_token_id=self.tokenizer.eos_token_id, repetition_penalty=1.1 ) # Decode response response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract generated part response = response[len(prompt):].strip() return response except Exception as e: logger.error(f"Error in LLM generation: {e}") return self._generate_fallback_response(query, context) def _generate_fallback_response(self, query: str, context: str) -> str: """Generate fallback response when LLM is unavailable.""" return f"""I apologize, but I'm currently unable to process your medical query due to technical limitations. For the question: "{query}" Please consult the following medical resources: - WHO Emergency Care Guidelines - ICRC First Aid Manual - Local medical professionals In any medical emergency, seek immediate professional medical attention. Available context from knowledge base: {context[:500]}...""" def _prepare_final_response( self, query: str, response: str, search_results: List[Dict[str, Any]], safety_check: Dict[str, Any] ) -> Dict[str, Any]: """Prepare final response with safety information.""" # Add safety warnings if needed if not safety_check["is_safe"]: response = f"⚠️ SAFETY WARNING: This response requires verification.\n\n{response}" if safety_check["confidence_score"] < 0.7: response += f"\n\n⚠️ Confidence Level: {safety_check['confidence_score']:.1%} - Please verify with medical professional." # Add source attribution if search_results: sources = list(set([result["source"] for result in search_results])) response += f"\n\nSources: {', '.join(sources)}" # Add disclaimer response += "\n\n⚠️ MEDICAL DISCLAIMER: This is AI-generated guidance for educational purposes. Always consult qualified medical professionals for diagnosis and treatment decisions." return { "response": response, "confidence_score": safety_check["confidence_score"], "safety_issues": safety_check["issues"], "safety_warnings": safety_check["warnings"], "sources": [result["source"] for result in search_results], "timestamp": datetime.now().isoformat() } def _create_error_response(self, error_message: str) -> Dict[str, Any]: """Create error response.""" return { "response": f"I apologize, but I encountered an error processing your request: {error_message}\n\nPlease try rephrasing your question or consult medical professionals directly.", "confidence_score": 0.0, "safety_issues": ["System error occurred"], "safety_warnings": ["Unable to verify medical accuracy due to system error"], "sources": [], "timestamp": datetime.now().isoformat() } # Global RAG system instance rag_system = None def initialize_system(): """Initialize the RAG system.""" global rag_system if rag_system is None: rag_system = GazaRAGSystem() rag_system.initialize() return rag_system def process_medical_query(query: str) -> str: """Process medical query and return response.""" if not query.strip(): return "Please enter a medical question." try: # Initialize system if needed system = initialize_system() # Generate response result = system.generate_response(query) return result["response"] except Exception as e: logger.error(f"Error processing query: {e}") return f"I apologize, but I encountered an error: {str(e)}\n\nPlease try again or consult medical professionals directly." def create_gradio_interface(): """Create Gradio interface for the application.""" # Custom CSS for medical theme css = """ .medical-header { background: linear-gradient(90deg, #2c5aa0 0%, #1e3a8a 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px; text-align: center; } .warning-box { background-color: #fef3cd; border: 1px solid #ffeaa7; border-radius: 5px; padding: 15px; margin: 10px 0; } .emergency-notice { background-color: #f8d7da; border: 1px solid #f5c6cb; border-radius: 5px; padding: 15px; margin: 10px 0; font-weight: bold; } """ with gr.Blocks(css=css, title="Gaza First Aid Assistant") as interface: # Header gr.HTML("""

🏥 Gaza First Aid Assistant

Specialized Medical Guidance for Healthcare Workers in Gaza

Enhanced with Offline Capabilities and Safety Validation

""") # Emergency notice gr.HTML("""
🚨 EMERGENCY NOTICE: For life-threatening emergencies, seek immediate professional medical attention. This AI assistant provides guidance to support, not replace, medical professionals.
""") # Main interface with gr.Row(): with gr.Column(scale=2): query_input = gr.Textbox( label="Medical Question", placeholder="Enter your first aid or medical question here...", lines=3 ) submit_btn = gr.Button("Get Medical Guidance", variant="primary") # Example queries gr.Examples( examples=[ "My patient is feeling dizzy, what do i do", "How to treat a gun wound", "How do i treat patients with stab wounds", "How to treat injuries from shrapnel", "How to treat a burn when clean water is limited?", "What are the signs of infection in a wound?", "How to stop severe bleeding with improvised materials?", "What to do for someone with difficulty breathing?", "How to treat dehydration in children?" ], inputs=query_input ) with gr.Column(scale=3): response_output = gr.Textbox( label="Medical Guidance", lines=15, max_lines=20 ) # Warning and disclaimer gr.HTML("""

⚠️ Important Medical Disclaimer

""") # Information about the system with gr.Accordion("About This System", open=False): gr.Markdown(""" ### Gaza First Aid Assistant - Enhanced Version This specialized medical AI assistant is designed specifically for healthcare workers in Gaza, incorporating: - **Offline-First Architecture**: Reduced dependency on external services - **Gaza-Specific Medical Knowledge**: WHO, ICRC, and MSF guidelines adapted for local conditions - **Comprehensive Safety Validation**: Multiple layers of medical fact-checking - **Resource-Aware Guidance**: Considers limited supplies and infrastructure - **Conflict-Adapted Protocols**: Medical advice tailored for conflict environments **Knowledge Sources:** - World Health Organization (WHO) Burn Prevention and Care Guidelines - International Committee of the Red Cross (ICRC) War Surgery Manuals - Médecins Sans Frontières (MSF) Field Guides - Palestine Red Crescent Society (PRCS) Field Experience - Standard First Aid and Emergency Medical Protocols **Version**: 2.0 | **Last Updated**: July 2025 """) # Event handlers submit_btn.click( fn=process_medical_query, inputs=query_input, outputs=response_output ) query_input.submit( fn=process_medical_query, inputs=query_input, outputs=response_output ) return interface def main(): """Main application entry point.""" logger.info("Starting Gaza First Aid Assistant...") try: # Create and launch interface interface = create_gradio_interface() # Launch with appropriate settings interface.launch( server_name="0.0.0.0", server_port=7860, share=False, # Set to True for public sharing debug=False ) except Exception as e: logger.error(f"Error launching application: {e}") sys.exit(1) if __name__ == "__main__": main()