Spaces:

salvinjose
/

HNTAI

Paused

File size: 9,440 Bytes

#!/usr/bin/env python3
"""
Test script for GGUF models on Hugging Face Spaces
Specifically tests the patient summary generation with GGUF models
"""

import os
import sys
import logging
import time

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Set environment variables for Hugging Face Spaces
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['GGUF_N_THREADS'] = '1'  # Single thread for Spaces
os.environ['GGUF_N_BATCH'] = '16'   # Small batch size for Spaces

def test_gguf_model_direct():
    """Test GGUF model loading directly"""
    logger.info("🧪 Testing GGUF Model Loading Directly...")
    
    try:
        from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
        
        # Test with the specific model and filename
        model_repo = "microsoft/Phi-3-mini-4k-instruct-gguf"
        filename = "Phi-3-mini-4k-instruct-q4.gguf"
        
        logger.info(f"Loading GGUF model: {model_repo}/{filename}")
        
        # Create pipeline directly
        pipeline = GGUFModelPipeline(model_repo, filename)
        
        # Test generation
        prompt = "Generate a brief medical summary: Patient has fever and cough."
        result = pipeline.generate(prompt, max_tokens=100)
        
        logger.info(f"✅ Direct GGUF test successful: {len(result)} characters")
        logger.info(f"Sample output: {result[:200]}...")
        
        return True, result
        
    except Exception as e:
        logger.error(f"❌ Direct GGUF test failed: {e}")
        return False, str(e)

def test_gguf_via_model_manager():
    """Test GGUF model via the unified model manager"""
    logger.info("🧪 Testing GGUF Model via Model Manager...")
    
    try:
        sys.path.append('.')
        from ai_med_extract.utils.model_manager import model_manager
        
        # Get GGUF model loader
        loader = model_manager.get_model_loader(
            "microsoft/Phi-3-mini-4k-instruct-gguf",
            "gguf",
            "Phi-3-mini-4k-instruct-q4.gguf"
        )
        
        # Check if it's actually using GGUF or fallback
        model_info = loader.get_model_info()
        logger.info(f"Model info: {model_info}")
        
        if model_info['type'] == 'gguf':
            logger.info("✅ GGUF model loaded successfully via manager")
            
            # Test generation
            prompt = "Generate a brief medical summary: Patient has chest pain."
            result = loader.generate(prompt, max_tokens=100)
            
            logger.info(f"✅ GGUF generation via manager: {len(result)} characters")
            logger.info(f"Sample output: {result[:200]}...")
            
            return True, result
        else:
            logger.warning("⚠️ Model manager returned fallback instead of GGUF")
            return False, "Fallback model used"
            
    except Exception as e:
        logger.error(f"❌ GGUF via model manager test failed: {e}")
        return False, str(e)

def test_patient_summarizer_gguf():
    """Test patient summarizer with GGUF model"""
    logger.info("🧪 Testing Patient Summarizer with GGUF Model...")
    
    try:
        sys.path.append('.')
        from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
        
        # Create agent with GGUF model
        agent = PatientSummarizerAgent(
            "microsoft/Phi-3-mini-4k-instruct-gguf",
            "gguf",
            "Phi-3-mini-4k-instruct-q4.gguf"
        )
        
        # Sample patient data
        sample_data = {
            "result": {
                "patientname": "John Doe",
                "patientnumber": "12345",
                "agey": "45",
                "gender": "Male",
                "allergies": ["Penicillin"],
                "social_history": "Non-smoker, occasional alcohol",
                "past_medical_history": ["Hypertension", "Diabetes"],
                "encounters": [
                    {
                        "visit_date": "2024-01-15",
                        "chief_complaint": "Chest pain",
                        "symptoms": "Sharp chest pain, shortness of breath",
                        "diagnosis": ["Angina", "Hypertension"],
                        "dr_notes": "Patient reports chest pain for 2 days",
                        "vitals": {"BP": "140/90", "HR": "85", "SpO2": "98%"},
                        "medications": ["Aspirin", "Metoprolol"],
                        "treatment": "Prescribed medications, follow-up in 1 week"
                    }
                ]
            }
        }
        
        # Generate clinical summary
        logger.info("Generating clinical summary...")
        summary = agent.generate_clinical_summary(sample_data)
        
        logger.info(f"✅ Patient summary generated: {len(summary)} characters")
        logger.info(f"Summary preview: {summary[:300]}...")
        
        return True, summary
        
    except Exception as e:
        logger.error(f"❌ Patient summarizer GGUF test failed: {e}")
        return False, str(e)

def test_huggingface_spaces_optimization():
    """Test Hugging Face Spaces optimization features"""
    logger.info("🧪 Testing Hugging Face Spaces Optimization...")
    
    try:
        # Check if we're in a Hugging Face Space
        is_hf_space = os.environ.get('SPACE_ID') is not None
        
        if is_hf_space:
            logger.info("🔄 Detected Hugging Face Space - testing optimization...")
            
            # Test with ultra-conservative settings
            os.environ['GGUF_N_THREADS'] = '1'
            os.environ['GGUF_N_BATCH'] = '16'
            
            # Test model loading with optimized settings
            from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
            
            pipeline = GGUFModelPipeline(
                "microsoft/Phi-3-mini-4k-instruct-gguf",
                "Phi-3-mini-4k-instruct-q4.gguf"
            )
            
            # Quick test
            result = pipeline.generate("Test prompt", max_tokens=50)
            
            logger.info(f"✅ Spaces optimization test passed: {len(result)} characters")
            return True, "Spaces optimization working"
        else:
            logger.info("🔄 Local environment detected - spaces optimization not applicable")
            return True, "Local environment"
            
    except Exception as e:
        logger.error(f"❌ Spaces optimization test failed: {e}")
        return False, str(e)

def main():
    """Main test function"""
    logger.info("🚀 Starting GGUF Hugging Face Spaces Tests...")
    logger.info("=" * 70)
    
    test_results = []
    
    # Run all tests
    tests = [
        ("Direct GGUF Loading", test_gguf_model_direct),
        ("GGUF via Model Manager", test_gguf_via_model_manager),
        ("Patient Summarizer GGUF", test_patient_summarizer_gguf),
        ("Spaces Optimization", test_huggingface_spaces_optimization)
    ]
    
    for test_name, test_func in tests:
        logger.info(f"\n🧪 Running {test_name} Test...")
        try:
            start_time = time.time()
            result, output = test_func()
            end_time = time.time()
            
            test_results.append((test_name, result, end_time - start_time))
            
            if result:
                logger.info(f"✅ {test_name} PASSED in {end_time - start_time:.2f}s")
            else:
                logger.warning(f"⚠️ {test_name} FAILED in {end_time - start_time:.2f}s")
                logger.warning(f"Output: {output}")
                
        except Exception as e:
            logger.error(f"❌ {test_name} test crashed: {e}")
            test_results.append((test_name, False, 0))
    
    # Summary
    logger.info("\n" + "=" * 70)
    logger.info("📊 TEST SUMMARY")
    logger.info("=" * 70)
    
    passed = 0
    total = len(test_results)
    
    for test_name, result, duration in test_results:
        status = "✅ PASS" if result else "❌ FAIL"
        logger.info(f"{test_name}: {status} ({duration:.2f}s)")
        if result:
            passed += 1
    
    logger.info(f"\nOverall: {passed}/{total} tests passed")
    
    if passed == total:
        logger.info("🎉 All tests passed! GGUF models are working perfectly on Hugging Face Spaces!")
        logger.info("✨ You can now use GGUF models for patient summaries!")
    else:
        logger.warning(f"⚠️ {total - passed} tests failed. Check the logs above for details.")
    
    # Recommendations for Spaces
    logger.info("\n💡 RECOMMENDATIONS FOR HUGGING FACE SPACES:")
    if passed >= total * 0.8:
        logger.info("✅ System is ready for production use on Spaces")
        logger.info("✅ GGUF models are optimized for memory constraints")
        logger.info("✅ Patient summaries will work with real GGUF models")
    elif passed >= total * 0.6:
        logger.info("⚠️ System is mostly working but has some issues")
        logger.info("⚠️ GGUF models may need configuration adjustments")
    else:
        logger.error("❌ System has significant issues with GGUF models")
        logger.error("❌ Review and fix failed tests before deployment")
    
    return passed == total

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)