File size: 9,440 Bytes
8704dff
 
16f55db
 
8704dff
 
 
 
 
16f55db
8704dff
 
 
 
 
16f55db
 
 
 
 
 
 
 
8704dff
 
16f55db
8704dff
16f55db
 
8704dff
 
16f55db
8704dff
16f55db
 
8704dff
16f55db
 
 
8704dff
16f55db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8704dff
16f55db
 
 
8704dff
16f55db
 
8704dff
16f55db
 
 
 
8704dff
 
16f55db
 
8704dff
16f55db
 
 
 
8704dff
16f55db
 
 
 
 
 
 
 
 
8704dff
16f55db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8704dff
16f55db
 
 
8704dff
16f55db
 
 
 
8704dff
 
16f55db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8704dff
 
 
16f55db
 
8704dff
16f55db
8704dff
16f55db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8704dff
 
16f55db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8704dff
16f55db
 
 
 
 
 
 
 
 
8704dff
16f55db
 
8704dff
16f55db
8704dff
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env python3
"""
Test script for GGUF models on Hugging Face Spaces
Specifically tests the patient summary generation with GGUF models
"""

import os
import sys
import logging
import time

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Set environment variables for Hugging Face Spaces
os.environ['HF_HOME'] = '/tmp/huggingface'
os.environ['GGUF_N_THREADS'] = '1'  # Single thread for Spaces
os.environ['GGUF_N_BATCH'] = '16'   # Small batch size for Spaces

def test_gguf_model_direct():
    """Test GGUF model loading directly"""
    logger.info("πŸ§ͺ Testing GGUF Model Loading Directly...")
    
    try:
        from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
        
        # Test with the specific model and filename
        model_repo = "microsoft/Phi-3-mini-4k-instruct-gguf"
        filename = "Phi-3-mini-4k-instruct-q4.gguf"
        
        logger.info(f"Loading GGUF model: {model_repo}/{filename}")
        
        # Create pipeline directly
        pipeline = GGUFModelPipeline(model_repo, filename)
        
        # Test generation
        prompt = "Generate a brief medical summary: Patient has fever and cough."
        result = pipeline.generate(prompt, max_tokens=100)
        
        logger.info(f"βœ… Direct GGUF test successful: {len(result)} characters")
        logger.info(f"Sample output: {result[:200]}...")
        
        return True, result
        
    except Exception as e:
        logger.error(f"❌ Direct GGUF test failed: {e}")
        return False, str(e)

def test_gguf_via_model_manager():
    """Test GGUF model via the unified model manager"""
    logger.info("πŸ§ͺ Testing GGUF Model via Model Manager...")
    
    try:
        sys.path.append('.')
        from ai_med_extract.utils.model_manager import model_manager
        
        # Get GGUF model loader
        loader = model_manager.get_model_loader(
            "microsoft/Phi-3-mini-4k-instruct-gguf",
            "gguf",
            "Phi-3-mini-4k-instruct-q4.gguf"
        )
        
        # Check if it's actually using GGUF or fallback
        model_info = loader.get_model_info()
        logger.info(f"Model info: {model_info}")
        
        if model_info['type'] == 'gguf':
            logger.info("βœ… GGUF model loaded successfully via manager")
            
            # Test generation
            prompt = "Generate a brief medical summary: Patient has chest pain."
            result = loader.generate(prompt, max_tokens=100)
            
            logger.info(f"βœ… GGUF generation via manager: {len(result)} characters")
            logger.info(f"Sample output: {result[:200]}...")
            
            return True, result
        else:
            logger.warning("⚠️ Model manager returned fallback instead of GGUF")
            return False, "Fallback model used"
            
    except Exception as e:
        logger.error(f"❌ GGUF via model manager test failed: {e}")
        return False, str(e)

def test_patient_summarizer_gguf():
    """Test patient summarizer with GGUF model"""
    logger.info("πŸ§ͺ Testing Patient Summarizer with GGUF Model...")
    
    try:
        sys.path.append('.')
        from ai_med_extract.agents.patient_summary_agent import PatientSummarizerAgent
        
        # Create agent with GGUF model
        agent = PatientSummarizerAgent(
            "microsoft/Phi-3-mini-4k-instruct-gguf",
            "gguf",
            "Phi-3-mini-4k-instruct-q4.gguf"
        )
        
        # Sample patient data
        sample_data = {
            "result": {
                "patientname": "John Doe",
                "patientnumber": "12345",
                "agey": "45",
                "gender": "Male",
                "allergies": ["Penicillin"],
                "social_history": "Non-smoker, occasional alcohol",
                "past_medical_history": ["Hypertension", "Diabetes"],
                "encounters": [
                    {
                        "visit_date": "2024-01-15",
                        "chief_complaint": "Chest pain",
                        "symptoms": "Sharp chest pain, shortness of breath",
                        "diagnosis": ["Angina", "Hypertension"],
                        "dr_notes": "Patient reports chest pain for 2 days",
                        "vitals": {"BP": "140/90", "HR": "85", "SpO2": "98%"},
                        "medications": ["Aspirin", "Metoprolol"],
                        "treatment": "Prescribed medications, follow-up in 1 week"
                    }
                ]
            }
        }
        
        # Generate clinical summary
        logger.info("Generating clinical summary...")
        summary = agent.generate_clinical_summary(sample_data)
        
        logger.info(f"βœ… Patient summary generated: {len(summary)} characters")
        logger.info(f"Summary preview: {summary[:300]}...")
        
        return True, summary
        
    except Exception as e:
        logger.error(f"❌ Patient summarizer GGUF test failed: {e}")
        return False, str(e)

def test_huggingface_spaces_optimization():
    """Test Hugging Face Spaces optimization features"""
    logger.info("πŸ§ͺ Testing Hugging Face Spaces Optimization...")
    
    try:
        # Check if we're in a Hugging Face Space
        is_hf_space = os.environ.get('SPACE_ID') is not None
        
        if is_hf_space:
            logger.info("πŸ”„ Detected Hugging Face Space - testing optimization...")
            
            # Test with ultra-conservative settings
            os.environ['GGUF_N_THREADS'] = '1'
            os.environ['GGUF_N_BATCH'] = '16'
            
            # Test model loading with optimized settings
            from ai_med_extract.utils.model_loader_gguf import GGUFModelPipeline
            
            pipeline = GGUFModelPipeline(
                "microsoft/Phi-3-mini-4k-instruct-gguf",
                "Phi-3-mini-4k-instruct-q4.gguf"
            )
            
            # Quick test
            result = pipeline.generate("Test prompt", max_tokens=50)
            
            logger.info(f"βœ… Spaces optimization test passed: {len(result)} characters")
            return True, "Spaces optimization working"
        else:
            logger.info("πŸ”„ Local environment detected - spaces optimization not applicable")
            return True, "Local environment"
            
    except Exception as e:
        logger.error(f"❌ Spaces optimization test failed: {e}")
        return False, str(e)

def main():
    """Main test function"""
    logger.info("πŸš€ Starting GGUF Hugging Face Spaces Tests...")
    logger.info("=" * 70)
    
    test_results = []
    
    # Run all tests
    tests = [
        ("Direct GGUF Loading", test_gguf_model_direct),
        ("GGUF via Model Manager", test_gguf_via_model_manager),
        ("Patient Summarizer GGUF", test_patient_summarizer_gguf),
        ("Spaces Optimization", test_huggingface_spaces_optimization)
    ]
    
    for test_name, test_func in tests:
        logger.info(f"\nπŸ§ͺ Running {test_name} Test...")
        try:
            start_time = time.time()
            result, output = test_func()
            end_time = time.time()
            
            test_results.append((test_name, result, end_time - start_time))
            
            if result:
                logger.info(f"βœ… {test_name} PASSED in {end_time - start_time:.2f}s")
            else:
                logger.warning(f"⚠️ {test_name} FAILED in {end_time - start_time:.2f}s")
                logger.warning(f"Output: {output}")
                
        except Exception as e:
            logger.error(f"❌ {test_name} test crashed: {e}")
            test_results.append((test_name, False, 0))
    
    # Summary
    logger.info("\n" + "=" * 70)
    logger.info("πŸ“Š TEST SUMMARY")
    logger.info("=" * 70)
    
    passed = 0
    total = len(test_results)
    
    for test_name, result, duration in test_results:
        status = "βœ… PASS" if result else "❌ FAIL"
        logger.info(f"{test_name}: {status} ({duration:.2f}s)")
        if result:
            passed += 1
    
    logger.info(f"\nOverall: {passed}/{total} tests passed")
    
    if passed == total:
        logger.info("πŸŽ‰ All tests passed! GGUF models are working perfectly on Hugging Face Spaces!")
        logger.info("✨ You can now use GGUF models for patient summaries!")
    else:
        logger.warning(f"⚠️ {total - passed} tests failed. Check the logs above for details.")
    
    # Recommendations for Spaces
    logger.info("\nπŸ’‘ RECOMMENDATIONS FOR HUGGING FACE SPACES:")
    if passed >= total * 0.8:
        logger.info("βœ… System is ready for production use on Spaces")
        logger.info("βœ… GGUF models are optimized for memory constraints")
        logger.info("βœ… Patient summaries will work with real GGUF models")
    elif passed >= total * 0.6:
        logger.info("⚠️ System is mostly working but has some issues")
        logger.info("⚠️ GGUF models may need configuration adjustments")
    else:
        logger.error("❌ System has significant issues with GGUF models")
        logger.error("❌ Review and fix failed tests before deployment")
    
    return passed == total

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)