Spaces:

salvinjose
/

HNTAI

Paused

App Files Files Community

sachinchandrankallar commited on Aug 27

Commit

16f55db

1 Parent(s): c6f267d

optimization

Browse files

Files changed (4) hide show

ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc +0 -0
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc +0 -0
ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc +0 -0
test_gguf_spaces.py +204 -103

ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ

ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED Viewed

Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ

ai_med_extract/utils/__pycache__/model_manager.cpython-311.pyc ADDED Viewed

Binary file (22 kB). View file

test_gguf_spaces.py CHANGED Viewed

@@ -1,148 +1,249 @@
 #!/usr/bin/env python3
 """
-Test script for GGUF model in Hugging Face Spaces with optimized settings
-This tests the ultra-conservative memory settings for Spaces
 """
 import os
 import sys
-import time
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-def test_gguf_spaces_optimization():
-    """Test GGUF model with Spaces-optimized settings"""
-    # Set environment variables for Hugging Face Spaces
-    os.environ['HF_HOME'] = '/tmp/huggingface'
-    os.environ['SPACE_ID'] = 'test_space'  # Simulate being in a Space
-    os.environ['GGUF_N_THREADS'] = '1'
-    os.environ['GGUF_N_BATCH'] = '16'
     try:
-        logger.info("Testing GGUF model with Spaces optimization...")
-        # Test the exact model name from your API call
-        model_name = "microsoft/Phi-3-mini-4k-instruct-gguf"
         filename = "Phi-3-mini-4k-instruct-q4.gguf"
-        logger.info(f"Model: {model_name}")
-        logger.info(f"Filename: {filename}")
-        logger.info("Environment: Simulating Hugging Face Space")
-        # Test import
-        try:
-            from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
-            logger.info("✓ GGUFModelPipeline import successful")
-        except ImportError as e:
-            logger.error(f"✗ Failed to import GGUFModelPipeline: {e}")
-            return False
-        # Test model loading with timeout
-        start_time = time.time()
-        try:
-            pipeline = GGUFModelPipeline(model_name, filename, timeout=300)
-            load_time = time.time() - start_time
-            logger.info(f"✓ Model loaded successfully in {load_time:.2f}s")
-            # Check if Spaces optimization was applied
-            if hasattr(pipeline, 'model'):
-                model = pipeline.model
-                logger.info(f"✓ Context window: {getattr(model, 'n_ctx', 'N/A')}")
-                logger.info(f"✓ Threads: {getattr(model, 'n_threads', 'N/A')}")
-                logger.info(f"✓ Batch size: {getattr(model, 'n_batch', 'N/A')}")
-        except Exception as e:
-            load_time = time.time() - start_time
-            logger.error(f"✗ Model loading failed after {load_time:.2f}s: {e}")
-            return False
-        # Test basic generation with reduced tokens
-        try:
-            test_prompt = "Generate a brief medical summary: Patient has fever and cough."
-            logger.info("Testing basic generation with reduced tokens...")
-            start_gen = time.time()
-            result = pipeline.generate(test_prompt, max_tokens=50)  # Reduced from 100
-            gen_time = time.time() - start_gen
-            logger.info(f"✓ Generation successful in {gen_time:.2f}s")
-            logger.info(f"Generated text length: {len(result)} characters")
-            logger.info(f"Sample output: {result[:100]}...")
-        except Exception as e:
-            logger.error(f"✗ Generation failed: {e}")
-            return False
-        # Test memory usage
-        try:
-            import psutil
-            process = psutil.Process()
-            memory_info = process.memory_info()
-            memory_mb = memory_info.rss / 1024 / 1024
-            logger.info(f"✓ Memory usage: {memory_mb:.1f} MB")
-            if memory_mb > 8000:  # 8GB warning
-                logger.warning(f"⚠ High memory usage: {memory_mb:.1f} MB")
-            else:
-                logger.info("✓ Memory usage within acceptable limits")
-        except ImportError:
-            logger.info("⚠ psutil not available - cannot check memory usage")
-        logger.info("🎉 All tests passed! GGUF model is optimized for Spaces.")
-        return True
     except Exception as e:
-        logger.error(f"✗ Test failed with unexpected error: {e}")
-        return False
-def test_fallback_pipeline():
-    """Test the fallback pipeline when GGUF fails"""
     try:
-        logger.info("Testing fallback pipeline...")
-        from ai_med_extract.utils.model_loader_gguf import create_fallback_pipeline
-        fallback = create_fallback_pipeline()
-        result = fallback.generate("Test prompt")
-        logger.info(f"✓ Fallback pipeline working: {len(result)} characters generated")
-        return True
     except Exception as e:
-        logger.error(f"✗ Fallback pipeline failed: {e}")
-        return False
 def main():
     """Main test function"""
-    logger.info("Starting GGUF Spaces optimization tests...")
-    # Test 1: GGUF model with Spaces optimization
-    gguf_success = test_gguf_spaces_optimization()
-    # Test 2: Fallback pipeline
-    fallback_success = test_fallback_pipeline()
     # Summary
-    logger.info("\n" + "="*60)
-    logger.info("SPACES OPTIMIZATION TEST SUMMARY")
-    logger.info("="*60)
-    logger.info(f"GGUF Spaces Optimization: {'✓ PASS' if gguf_success else '✗ FAIL'}")
-    logger.info(f"Fallback Pipeline: {'✓ PASS' if fallback_success else '✗ PASS'}")
-    if gguf_success:
-        logger.info("🎉 GGUF model is optimized for Hugging Face Spaces!")
-        logger.info("Your API should work without 500 errors.")
-        logger.info("Memory usage has been optimized for containerized environments.")
     else:
-        logger.warning("⚠️  GGUF model still has issues. The fallback will be used.")
-        logger.info("Your API will still work but with reduced functionality.")
-    return gguf_success
 if __name__ == "__main__":
     success = main()

 #!/usr/bin/env python3
 """
+Test script for GGUF models on Hugging Face Spaces
+Specifically tests the patient summary generation with GGUF models
 """
 import os
 import sys
 import logging
+import time
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# Set environment variables for Hugging Face Spaces
+os.environ['HF_HOME'] = '/tmp/huggingface'
+os.environ['GGUF_N_THREADS'] = '1'  # Single thread for Spaces
+os.environ['GGUF_N_BATCH'] = '16'   # Small batch size for Spaces
+def test_gguf_model_direct():
+    """Test GGUF model loading directly"""
+    logger.info("🧪 Testing GGUF Model Loading Directly...")
     try:
+        from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
+        # Test with the specific model and filename
+        model_repo = "microsoft/Phi-3-mini-4k-instruct-gguf"
         filename = "Phi-3-mini-4k-instruct-q4.gguf"
+        logger.info(f"Loading GGUF model: {model_repo}/{filename}")
+        # Create pipeline directly
+        pipeline = GGUFModelPipeline(model_repo, filename)
+        # Test generation
+        prompt = "Generate a brief medical summary: Patient has fever and cough."
+        result = pipeline.generate(prompt, max_tokens=100)
+        logger.info(f"✅ Direct GGUF test successful: {len(result)} characters")
+        logger.info(f"Sample output: {result[:200]}...")
+        return True, result
+    except Exception as e:
+        logger.error(f"❌ Direct GGUF test failed: {e}")
+        return False, str(e)
+def test_gguf_via_model_manager():
+    """Test GGUF model via the unified model manager"""
+    logger.info("🧪 Testing GGUF Model via Model Manager...")
+    try:
+        sys.path.append('.')
+        from ai_med_extract.utils.model_manager import model_manager
+        # Get GGUF model loader
+        loader = model_manager.get_model_loader(
+            "microsoft/Phi-3-mini-4k-instruct-gguf",
+            "gguf",
+            "Phi-3-mini-4k-instruct-q4.gguf"
+        )
+        # Check if it's actually using GGUF or fallback
+        model_info = loader.get_model_info()
+        logger.info(f"Model info: {model_info}")
+        if model_info['type'] == 'gguf':
+            logger.info("✅ GGUF model loaded successfully via manager")
+            # Test generation
+            prompt = "Generate a brief medical summary: Patient has chest pain."
+            result = loader.generate(prompt, max_tokens=100)
+            logger.info(f"✅ GGUF generation via manager: {len(result)} characters")
+            logger.info(f"Sample output: {result[:200]}...")
+            return True, result
+        else:
+            logger.warning("⚠️ Model manager returned fallback instead of GGUF")
+            return False, "Fallback model used"
     except Exception as e:
+        logger.error(f"❌ GGUF via model manager test failed: {e}")
+        return False, str(e)
+def test_patient_summarizer_gguf():
+    """Test patient summarizer with GGUF model"""
+    logger.info("🧪 Testing Patient Summarizer with GGUF Model...")
     try:
+        sys.path.append('.')
+        from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
+        # Create agent with GGUF model
+        agent = PatientSummarizerAgent(
+            "microsoft/Phi-3-mini-4k-instruct-gguf",
+            "gguf",
+            "Phi-3-mini-4k-instruct-q4.gguf"
+        )
+        # Sample patient data
+        sample_data = {
+            "result": {
+                "patientname": "John Doe",
+                "patientnumber": "12345",
+                "agey": "45",
+                "gender": "Male",
+                "allergies": ["Penicillin"],
+                "social_history": "Non-smoker, occasional alcohol",
+                "past_medical_history": ["Hypertension", "Diabetes"],
+                "encounters": [
+                    {
+                        "visit_date": "2024-01-15",
+                        "chief_complaint": "Chest pain",
+                        "symptoms": "Sharp chest pain, shortness of breath",
+                        "diagnosis": ["Angina", "Hypertension"],
+                        "dr_notes": "Patient reports chest pain for 2 days",
+                        "vitals": {"BP": "140/90", "HR": "85", "SpO2": "98%"},
+                        "medications": ["Aspirin", "Metoprolol"],
+                        "treatment": "Prescribed medications, follow-up in 1 week"
+                    }
+                ]
+            }
+        }
+        # Generate clinical summary
+        logger.info("Generating clinical summary...")
+        summary = agent.generate_clinical_summary(sample_data)
+        logger.info(f"✅ Patient summary generated: {len(summary)} characters")
+        logger.info(f"Summary preview: {summary[:300]}...")
+        return True, summary
     except Exception as e:
+        logger.error(f"❌ Patient summarizer GGUF test failed: {e}")
+        return False, str(e)
+def test_huggingface_spaces_optimization():
+    """Test Hugging Face Spaces optimization features"""
+    logger.info("🧪 Testing Hugging Face Spaces Optimization...")
+    try:
+        # Check if we're in a Hugging Face Space
+        is_hf_space = os.environ.get('SPACE_ID') is not None
+        if is_hf_space:
+            logger.info("🔄 Detected Hugging Face Space - testing optimization...")
+            # Test with ultra-conservative settings
+            os.environ['GGUF_N_THREADS'] = '1'
+            os.environ['GGUF_N_BATCH'] = '16'
+            # Test model loading with optimized settings
+            from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
+            pipeline = GGUFModelPipeline(
+                "microsoft/Phi-3-mini-4k-instruct-gguf",
+                "Phi-3-mini-4k-instruct-q4.gguf"
+            )
+            # Quick test
+            result = pipeline.generate("Test prompt", max_tokens=50)
+            logger.info(f"✅ Spaces optimization test passed: {len(result)} characters")
+            return True, "Spaces optimization working"
+        else:
+            logger.info("🔄 Local environment detected - spaces optimization not applicable")
+            return True, "Local environment"
+    except Exception as e:
+        logger.error(f"❌ Spaces optimization test failed: {e}")
+        return False, str(e)
 def main():
     """Main test function"""
+    logger.info("🚀 Starting GGUF Hugging Face Spaces Tests...")
+    logger.info("=" * 70)
+    test_results = []
+    # Run all tests
+    tests = [
+        ("Direct GGUF Loading", test_gguf_model_direct),
+        ("GGUF via Model Manager", test_gguf_via_model_manager),
+        ("Patient Summarizer GGUF", test_patient_summarizer_gguf),
+        ("Spaces Optimization", test_huggingface_spaces_optimization)
+    ]
+    for test_name, test_func in tests:
+        logger.info(f"\n🧪 Running {test_name} Test...")
+        try:
+            start_time = time.time()
+            result, output = test_func()
+            end_time = time.time()
+            test_results.append((test_name, result, end_time - start_time))
+            if result:
+                logger.info(f"✅ {test_name} PASSED in {end_time - start_time:.2f}s")
+            else:
+                logger.warning(f"⚠️ {test_name} FAILED in {end_time - start_time:.2f}s")
+                logger.warning(f"Output: {output}")
+        except Exception as e:
+            logger.error(f"❌ {test_name} test crashed: {e}")
+            test_results.append((test_name, False, 0))
     # Summary
+    logger.info("\n" + "=" * 70)
+    logger.info("📊 TEST SUMMARY")
+    logger.info("=" * 70)
+    passed = 0
+    total = len(test_results)
+    for test_name, result, duration in test_results:
+        status = "✅ PASS" if result else "❌ FAIL"
+        logger.info(f"{test_name}: {status} ({duration:.2f}s)")
+        if result:
+            passed += 1
+    logger.info(f"\nOverall: {passed}/{total} tests passed")
+    if passed == total:
+        logger.info("🎉 All tests passed! GGUF models are working perfectly on Hugging Face Spaces!")
+        logger.info("✨ You can now use GGUF models for patient summaries!")
+    else:
+        logger.warning(f"⚠️ {total - passed} tests failed. Check the logs above for details.")
+    # Recommendations for Spaces
+    logger.info("\n💡 RECOMMENDATIONS FOR HUGGING FACE SPACES:")
+    if passed >= total * 0.8:
+        logger.info("✅ System is ready for production use on Spaces")
+        logger.info("✅ GGUF models are optimized for memory constraints")
+        logger.info("✅ Patient summaries will work with real GGUF models")
+    elif passed >= total * 0.6:
+        logger.info("⚠️ System is mostly working but has some issues")
+        logger.info("⚠️ GGUF models may need configuration adjustments")
     else:
+        logger.error("❌ System has significant issues with GGUF models")
+        logger.error("❌ Review and fix failed tests before deployment")
+    return passed == total
 if __name__ == "__main__":
     success = main()