HNTAI / test_gguf_spaces.py
sachinchandrankallar's picture
optimization
16f55db
raw
history blame
9.44 kB
#!/usr/bin/env python3
"""
Test script for GGUF models on Hugging Face Spaces
Specifically tests the patient summary generation with GGUF models
"""
import os
import sys
import logging
import time
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Set environment variables for Hugging Face Spaces
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['GGUF_N_THREADS'] = '1' # Single thread for Spaces
os.environ['GGUF_N_BATCH'] = '16' # Small batch size for Spaces
def test_gguf_model_direct():
"""Test GGUF model loading directly"""
logger.info("πŸ§ͺ Testing GGUF Model Loading Directly...")
try:
from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
# Test with the specific model and filename
model_repo = "microsoft/Phi-3-mini-4k-instruct-gguf"
filename = "Phi-3-mini-4k-instruct-q4.gguf"
logger.info(f"Loading GGUF model: {model_repo}/{filename}")
# Create pipeline directly
pipeline = GGUFModelPipeline(model_repo, filename)
# Test generation
prompt = "Generate a brief medical summary: Patient has fever and cough."
result = pipeline.generate(prompt, max_tokens=100)
logger.info(f"βœ… Direct GGUF test successful: {len(result)} characters")
logger.info(f"Sample output: {result[:200]}...")
return True, result
except Exception as e:
logger.error(f"❌ Direct GGUF test failed: {e}")
return False, str(e)
def test_gguf_via_model_manager():
"""Test GGUF model via the unified model manager"""
logger.info("πŸ§ͺ Testing GGUF Model via Model Manager...")
try:
sys.path.append('.')
from ai_med_extract.utils.model_manager import model_manager
# Get GGUF model loader
loader = model_manager.get_model_loader(
"microsoft/Phi-3-mini-4k-instruct-gguf",
"gguf",
"Phi-3-mini-4k-instruct-q4.gguf"
)
# Check if it's actually using GGUF or fallback
model_info = loader.get_model_info()
logger.info(f"Model info: {model_info}")
if model_info['type'] == 'gguf':
logger.info("βœ… GGUF model loaded successfully via manager")
# Test generation
prompt = "Generate a brief medical summary: Patient has chest pain."
result = loader.generate(prompt, max_tokens=100)
logger.info(f"βœ… GGUF generation via manager: {len(result)} characters")
logger.info(f"Sample output: {result[:200]}...")
return True, result
else:
logger.warning("⚠️ Model manager returned fallback instead of GGUF")
return False, "Fallback model used"
except Exception as e:
logger.error(f"❌ GGUF via model manager test failed: {e}")
return False, str(e)
def test_patient_summarizer_gguf():
"""Test patient summarizer with GGUF model"""
logger.info("πŸ§ͺ Testing Patient Summarizer with GGUF Model...")
try:
sys.path.append('.')
from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
# Create agent with GGUF model
agent = PatientSummarizerAgent(
"microsoft/Phi-3-mini-4k-instruct-gguf",
"gguf",
"Phi-3-mini-4k-instruct-q4.gguf"
)
# Sample patient data
sample_data = {
"result": {
"patientname": "John Doe",
"patientnumber": "12345",
"agey": "45",
"gender": "Male",
"allergies": ["Penicillin"],
"social_history": "Non-smoker, occasional alcohol",
"past_medical_history": ["Hypertension", "Diabetes"],
"encounters": [
{
"visit_date": "2024-01-15",
"chief_complaint": "Chest pain",
"symptoms": "Sharp chest pain, shortness of breath",
"diagnosis": ["Angina", "Hypertension"],
"dr_notes": "Patient reports chest pain for 2 days",
"vitals": {"BP": "140/90", "HR": "85", "SpO2": "98%"},
"medications": ["Aspirin", "Metoprolol"],
"treatment": "Prescribed medications, follow-up in 1 week"
}
]
}
}
# Generate clinical summary
logger.info("Generating clinical summary...")
summary = agent.generate_clinical_summary(sample_data)
logger.info(f"βœ… Patient summary generated: {len(summary)} characters")
logger.info(f"Summary preview: {summary[:300]}...")
return True, summary
except Exception as e:
logger.error(f"❌ Patient summarizer GGUF test failed: {e}")
return False, str(e)
def test_huggingface_spaces_optimization():
"""Test Hugging Face Spaces optimization features"""
logger.info("πŸ§ͺ Testing Hugging Face Spaces Optimization...")
try:
# Check if we're in a Hugging Face Space
is_hf_space = os.environ.get('SPACE_ID') is not None
if is_hf_space:
logger.info("πŸ”„ Detected Hugging Face Space - testing optimization...")
# Test with ultra-conservative settings
os.environ['GGUF_N_THREADS'] = '1'
os.environ['GGUF_N_BATCH'] = '16'
# Test model loading with optimized settings
from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
pipeline = GGUFModelPipeline(
"microsoft/Phi-3-mini-4k-instruct-gguf",
"Phi-3-mini-4k-instruct-q4.gguf"
)
# Quick test
result = pipeline.generate("Test prompt", max_tokens=50)
logger.info(f"βœ… Spaces optimization test passed: {len(result)} characters")
return True, "Spaces optimization working"
else:
logger.info("πŸ”„ Local environment detected - spaces optimization not applicable")
return True, "Local environment"
except Exception as e:
logger.error(f"❌ Spaces optimization test failed: {e}")
return False, str(e)
def main():
"""Main test function"""
logger.info("πŸš€ Starting GGUF Hugging Face Spaces Tests...")
logger.info("=" * 70)
test_results = []
# Run all tests
tests = [
("Direct GGUF Loading", test_gguf_model_direct),
("GGUF via Model Manager", test_gguf_via_model_manager),
("Patient Summarizer GGUF", test_patient_summarizer_gguf),
("Spaces Optimization", test_huggingface_spaces_optimization)
]
for test_name, test_func in tests:
logger.info(f"\nπŸ§ͺ Running {test_name} Test...")
try:
start_time = time.time()
result, output = test_func()
end_time = time.time()
test_results.append((test_name, result, end_time - start_time))
if result:
logger.info(f"βœ… {test_name} PASSED in {end_time - start_time:.2f}s")
else:
logger.warning(f"⚠️ {test_name} FAILED in {end_time - start_time:.2f}s")
logger.warning(f"Output: {output}")
except Exception as e:
logger.error(f"❌ {test_name} test crashed: {e}")
test_results.append((test_name, False, 0))
# Summary
logger.info("\n" + "=" * 70)
logger.info("πŸ“Š TEST SUMMARY")
logger.info("=" * 70)
passed = 0
total = len(test_results)
for test_name, result, duration in test_results:
status = "βœ… PASS" if result else "❌ FAIL"
logger.info(f"{test_name}: {status} ({duration:.2f}s)")
if result:
passed += 1
logger.info(f"\nOverall: {passed}/{total} tests passed")
if passed == total:
logger.info("πŸŽ‰ All tests passed! GGUF models are working perfectly on Hugging Face Spaces!")
logger.info("✨ You can now use GGUF models for patient summaries!")
else:
logger.warning(f"⚠️ {total - passed} tests failed. Check the logs above for details.")
# Recommendations for Spaces
logger.info("\nπŸ’‘ RECOMMENDATIONS FOR HUGGING FACE SPACES:")
if passed >= total * 0.8:
logger.info("βœ… System is ready for production use on Spaces")
logger.info("βœ… GGUF models are optimized for memory constraints")
logger.info("βœ… Patient summaries will work with real GGUF models")
elif passed >= total * 0.6:
logger.info("⚠️ System is mostly working but has some issues")
logger.info("⚠️ GGUF models may need configuration adjustments")
else:
logger.error("❌ System has significant issues with GGUF models")
logger.error("❌ Review and fix failed tests before deployment")
return passed == total
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)