#!/usr/bin/env python3 """ Test script for GGUF models on Hugging Face Spaces Specifically tests the patient summary generation with GGUF models """ import os import sys import logging import time # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Set environment variables for Hugging Face Spaces os.environ['HF_HOME'] = '/tmp/huggingface' os.environ['GGUF_N_THREADS'] = '1' # Single thread for Spaces os.environ['GGUF_N_BATCH'] = '16' # Small batch size for Spaces def test_gguf_model_direct(): """Test GGUF model loading directly""" logger.info("๐Ÿงช Testing GGUF Model Loading Directly...") try: from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline # Test with the specific model and filename model_repo = "microsoft/Phi-3-mini-4k-instruct-gguf" filename = "Phi-3-mini-4k-instruct-q4.gguf" logger.info(f"Loading GGUF model: {model_repo}/{filename}") # Create pipeline directly pipeline = GGUFModelPipeline(model_repo, filename) # Test generation prompt = "Generate a brief medical summary: Patient has fever and cough." result = pipeline.generate(prompt, max_tokens=100) logger.info(f"โœ… Direct GGUF test successful: {len(result)} characters") logger.info(f"Sample output: {result[:200]}...") return True, result except Exception as e: logger.error(f"โŒ Direct GGUF test failed: {e}") return False, str(e) def test_gguf_via_model_manager(): """Test GGUF model via the unified model manager""" logger.info("๐Ÿงช Testing GGUF Model via Model Manager...") try: sys.path.append('.') from ai_med_extract.utils.model_manager import model_manager # Get GGUF model loader loader = model_manager.get_model_loader( "microsoft/Phi-3-mini-4k-instruct-gguf", "gguf", "Phi-3-mini-4k-instruct-q4.gguf" ) # Check if it's actually using GGUF or fallback model_info = loader.get_model_info() logger.info(f"Model info: {model_info}") if model_info['type'] == 'gguf': logger.info("โœ… GGUF model loaded successfully via manager") # Test generation prompt = "Generate a brief medical summary: Patient has chest pain." result = loader.generate(prompt, max_tokens=100) logger.info(f"โœ… GGUF generation via manager: {len(result)} characters") logger.info(f"Sample output: {result[:200]}...") return True, result else: logger.warning("โš ๏ธ Model manager returned fallback instead of GGUF") return False, "Fallback model used" except Exception as e: logger.error(f"โŒ GGUF via model manager test failed: {e}") return False, str(e) def test_patient_summarizer_gguf(): """Test patient summarizer with GGUF model""" logger.info("๐Ÿงช Testing Patient Summarizer with GGUF Model...") try: sys.path.append('.') from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent # Create agent with GGUF model agent = PatientSummarizerAgent( "microsoft/Phi-3-mini-4k-instruct-gguf", "gguf", "Phi-3-mini-4k-instruct-q4.gguf" ) # Sample patient data sample_data = { "result": { "patientname": "John Doe", "patientnumber": "12345", "agey": "45", "gender": "Male", "allergies": ["Penicillin"], "social_history": "Non-smoker, occasional alcohol", "past_medical_history": ["Hypertension", "Diabetes"], "encounters": [ { "visit_date": "2024-01-15", "chief_complaint": "Chest pain", "symptoms": "Sharp chest pain, shortness of breath", "diagnosis": ["Angina", "Hypertension"], "dr_notes": "Patient reports chest pain for 2 days", "vitals": {"BP": "140/90", "HR": "85", "SpO2": "98%"}, "medications": ["Aspirin", "Metoprolol"], "treatment": "Prescribed medications, follow-up in 1 week" } ] } } # Generate clinical summary logger.info("Generating clinical summary...") summary = agent.generate_clinical_summary(sample_data) logger.info(f"โœ… Patient summary generated: {len(summary)} characters") logger.info(f"Summary preview: {summary[:300]}...") return True, summary except Exception as e: logger.error(f"โŒ Patient summarizer GGUF test failed: {e}") return False, str(e) def test_huggingface_spaces_optimization(): """Test Hugging Face Spaces optimization features""" logger.info("๐Ÿงช Testing Hugging Face Spaces Optimization...") try: # Check if we're in a Hugging Face Space is_hf_space = os.environ.get('SPACE_ID') is not None if is_hf_space: logger.info("๐Ÿ”„ Detected Hugging Face Space - testing optimization...") # Test with ultra-conservative settings os.environ['GGUF_N_THREADS'] = '1' os.environ['GGUF_N_BATCH'] = '16' # Test model loading with optimized settings from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline pipeline = GGUFModelPipeline( "microsoft/Phi-3-mini-4k-instruct-gguf", "Phi-3-mini-4k-instruct-q4.gguf" ) # Quick test result = pipeline.generate("Test prompt", max_tokens=50) logger.info(f"โœ… Spaces optimization test passed: {len(result)} characters") return True, "Spaces optimization working" else: logger.info("๐Ÿ”„ Local environment detected - spaces optimization not applicable") return True, "Local environment" except Exception as e: logger.error(f"โŒ Spaces optimization test failed: {e}") return False, str(e) def main(): """Main test function""" logger.info("๐Ÿš€ Starting GGUF Hugging Face Spaces Tests...") logger.info("=" * 70) test_results = [] # Run all tests tests = [ ("Direct GGUF Loading", test_gguf_model_direct), ("GGUF via Model Manager", test_gguf_via_model_manager), ("Patient Summarizer GGUF", test_patient_summarizer_gguf), ("Spaces Optimization", test_huggingface_spaces_optimization) ] for test_name, test_func in tests: logger.info(f"\n๐Ÿงช Running {test_name} Test...") try: start_time = time.time() result, output = test_func() end_time = time.time() test_results.append((test_name, result, end_time - start_time)) if result: logger.info(f"โœ… {test_name} PASSED in {end_time - start_time:.2f}s") else: logger.warning(f"โš ๏ธ {test_name} FAILED in {end_time - start_time:.2f}s") logger.warning(f"Output: {output}") except Exception as e: logger.error(f"โŒ {test_name} test crashed: {e}") test_results.append((test_name, False, 0)) # Summary logger.info("\n" + "=" * 70) logger.info("๐Ÿ“Š TEST SUMMARY") logger.info("=" * 70) passed = 0 total = len(test_results) for test_name, result, duration in test_results: status = "โœ… PASS" if result else "โŒ FAIL" logger.info(f"{test_name}: {status} ({duration:.2f}s)") if result: passed += 1 logger.info(f"\nOverall: {passed}/{total} tests passed") if passed == total: logger.info("๐ŸŽ‰ All tests passed! GGUF models are working perfectly on Hugging Face Spaces!") logger.info("โœจ You can now use GGUF models for patient summaries!") else: logger.warning(f"โš ๏ธ {total - passed} tests failed. Check the logs above for details.") # Recommendations for Spaces logger.info("\n๐Ÿ’ก RECOMMENDATIONS FOR HUGGING FACE SPACES:") if passed >= total * 0.8: logger.info("โœ… System is ready for production use on Spaces") logger.info("โœ… GGUF models are optimized for memory constraints") logger.info("โœ… Patient summaries will work with real GGUF models") elif passed >= total * 0.6: logger.info("โš ๏ธ System is mostly working but has some issues") logger.info("โš ๏ธ GGUF models may need configuration adjustments") else: logger.error("โŒ System has significant issues with GGUF models") logger.error("โŒ Review and fix failed tests before deployment") return passed == total if __name__ == "__main__": success = main() sys.exit(0 if success else 1)