Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

rivapereira123 commited on Jul 21

Commit

f2f4366

verified ·

1 Parent(s): fed12ea

Update core/fact_checker.py

Browse files

Files changed (1) hide show

core/fact_checker.py +82 -38

core/fact_checker.py CHANGED Viewed

@@ -1,87 +1,131 @@
 import re
-from typing import Dict, List, Any
 def clean_ocr_artifacts(text: str) -> str:
     text = re.sub(r'\s{2,}', ' ', text)
-    text = re.sub(r'(?<=[\.!?]\s)([eEoO])([A-Z][a-z]+)', r'\2', text)
     text = re.sub(r'\b[Aa]love\b', 'aloe', text)
     text = re.sub(r'\bRelevanci\b', 'Relevance', text)
     text = re.sub(r'\bAlove\b', 'Aloe', text)
     text = re.sub(r'\b[aA]dvice\b', 'advice', text)
     return text.strip()
 class MedicalFactChecker:
     def __init__(self):
         self.contraindications = self._load_contraindications()
         self.dosage_patterns = self._compile_dosage_patterns()
-        self.definitive_patterns = [re.compile(r, re.IGNORECASE) for r in [
-            r'always\s+(?:use|take|apply)',
-            r'never\s+(?:use|take|apply)',
-            r'will\s+(?:cure|heal|fix)',
-            r'guaranteed\s+to',
-            r'completely\s+(?:safe|effective)'
-        ]]
     def _load_contraindications(self) -> Dict[str, List[str]]:
         return {
             "aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
             "ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
             "hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
             "tourniquets": ["non-life-threatening bleeding", "without proper training"]
         }
     def _compile_dosage_patterns(self) -> List[re.Pattern]:
         patterns = [
-            r'\d+\s*mg\b',
-            r'\d+\s*g\b',
-            r'\d+\s*ml\b',
-            r'\d+\s*tablets?\b',
-            r'\d+\s*times?\s+(?:per\s+)?day\b',
-            r'every\s+\d+\s+hours?\b'
         ]
-        return [re.compile(p, re.IGNORECASE) for p in patterns]
     def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
         issues = []
         warnings = []
         accuracy_score = 0.0
         response_lower = response.lower()
-        for med, contraindications in self.contraindications.items():
-            if med in response_lower:
-                for item in contraindications:
-                    if any(word in response_lower for word in item.split()):
-                        issues.append(f"Contraindication: {med} with {item}")
                         accuracy_score -= 0.3
                         break
         if context:
             resp_words = set(response_lower.split())
             ctx_words = set(context.lower().split())
             context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0
-            if context_similarity < 0.5:
-                warnings.append(f"Low context match: {context_similarity:.2f}")
                 accuracy_score -= 0.1
         for pattern in self.definitive_patterns:
             if pattern.search(response):
-                issues.append("Definitive claim detected")
                 accuracy_score -= 0.4
                 break
         for pattern in self.dosage_patterns:
             if pattern.search(response):
-                warnings.append("Dosage detected – verify with expert")
                 accuracy_score -= 0.1
                 break
-        confidence = max(0.0, min(1.0, 0.8 + accuracy_score))
         return {
-            "confidence_score": confidence,
             "issues": issues,
             "warnings": warnings,
-            "context_similarity": context_similarity if context else 0.0,
-            "is_safe": len(issues) == 0 and confidence > 0.5
         }

 import re
+from typing import Dict, Any, List
 def clean_ocr_artifacts(text: str) -> str:
     text = re.sub(r'\s{2,}', ' ', text)
+    text = re.sub(r'(?<=[\.\?!]\s)([eEoO])([A-Z][a-z]+)', r'\2', text)  # eFlood → Flood, oSeek → Seek
     text = re.sub(r'\b[Aa]love\b', 'aloe', text)
     text = re.sub(r'\bRelevanci\b', 'Relevance', text)
     text = re.sub(r'\bAlove\b', 'Aloe', text)
     text = re.sub(r'\b[aA]dvice\b', 'advice', text)
     return text.strip()
 class MedicalFactChecker:
+    """Enhanced medical fact checker with faster validation"""
     def __init__(self):
+        self.medical_facts = self._load_medical_facts()
         self.contraindications = self._load_contraindications()
         self.dosage_patterns = self._compile_dosage_patterns()
+        self.definitive_patterns = [
+            re.compile(r, re.IGNORECASE) for r in [
+                r'always\s+(?:use|take|apply)',
+                r'never\s+(?:use|take|apply)',
+                r'will\s+(?:cure|heal|fix)',
+                r'guaranteed\s+to',
+                r'completely\s+(?:safe|effective)'
+            ]
+        ]
+    def _load_medical_facts(self) -> Dict[str, Any]:
+        """Pre-loaded medical facts for Gaza context"""
+        return {
+            "burn_treatment": {
+                "cool_water": "Use clean, cool (not ice-cold) water for 10-20 minutes",
+                "no_ice": "Never apply ice directly to burns",
+                "clean_cloth": "Cover with clean, dry cloth if available"
+            },
+            "wound_care": {
+                "pressure": "Apply direct pressure to control bleeding",
+                "elevation": "Elevate injured limb if possible",
+                "clean_hands": "Clean hands before treating wounds when possible"
+            },
+            "infection_signs": {
+                "redness": "Increasing redness around wound",
+                "warmth": "Increased warmth at wound site",
+                "pus": "Yellow or green discharge",
+                "fever": "Fever may indicate systemic infection"
+            }
+        }
     def _load_contraindications(self) -> Dict[str, List[str]]:
+        """Pre-loaded contraindications for common treatments"""
         return {
             "aspirin": ["children under 16", "bleeding disorders", "stomach ulcers"],
             "ibuprofen": ["kidney disease", "heart failure", "stomach bleeding"],
             "hydrogen_peroxide": ["deep wounds", "closed wounds", "eyes"],
             "tourniquets": ["non-life-threatening bleeding", "without proper training"]
         }
     def _compile_dosage_patterns(self) -> List[re.Pattern]:
+        """Pre-compiled dosage patterns"""
         patterns = [
+            r'\d+\s*mg\b',  # milligrams
+            r'\d+\s*g\b',   # grams
+            r'\d+\s*ml\b',  # milliliters
+            r'\d+\s*tablets?\b',  # tablets
+            r'\d+\s*times?\s+(?:per\s+)?day\b',  # frequency
+            r'every\s+\d+\s+hours?\b'  # intervals
         ]
+        return [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
     def check_medical_accuracy(self, response: str, context: str) -> Dict[str, Any]:
+        """Enhanced medical accuracy check with Gaza-specific considerations"""
+        if response is None:
+            response = ""
         issues = []
         warnings = []
         accuracy_score = 0.0
+        # Check for contraindications (faster keyword matching)
         response_lower = response.lower()
+        for medication, contra_list in self.contraindications.items():
+            if medication in response_lower:
+                for contra in contra_list:
+                    if any(word in response_lower for word in contra.split()):
+                        issues.append(f"Potential contraindication: {medication} with {contra}")
                         accuracy_score -= 0.3
                         break
+        # Context alignment using Jaccard similarity
         if context:
             resp_words = set(response_lower.split())
             ctx_words = set(context.lower().split())
             context_similarity = len(resp_words & ctx_words) / len(resp_words | ctx_words) if ctx_words else 0.0
+            if context_similarity < 0.5:  # Lowered threshold for Gaza context
+                warnings.append(f"Low context similarity: {context_similarity:.2f}")
                 accuracy_score -= 0.1
+        else:
+            context_similarity = 0.0
+        # Gaza-specific resource checks
+        gaza_resources = ["clean water", "sterile", "hospital", "ambulance", "electricity"]
+        if any(resource in response_lower for resource in gaza_resources):
+            warnings.append("Consider resource limitations in Gaza context")
+            accuracy_score -= 0.05
+        # Unsupported claims check
         for pattern in self.definitive_patterns:
             if pattern.search(response):
+                issues.append(f"Unsupported definitive claim detected")
                 accuracy_score -= 0.4
                 break
+        # Dosage validation
         for pattern in self.dosage_patterns:
             if pattern.search(response):
+                warnings.append("Dosage detected - verify with professional")
                 accuracy_score -= 0.1
                 break
+        confidence_score = max(0.0, min(1.0, 0.8 + accuracy_score))
         return {
+            "confidence_score": confidence_score,
             "issues": issues,
             "warnings": warnings,
+            "context_similarity": context_similarity,
+            "is_safe": len(issues) == 0 and confidence_score > 0.5
         }