Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

rivapereira123 commited on Jul 13

Commit

5d652f8

verified ·

1 Parent(s): b99e661

Upload test_enhanced_system.py

Browse files

Files changed (1) hide show

test_enhanced_system.py +490 -0

test_enhanced_system.py ADDED Viewed

	@@ -0,0 +1,490 @@

+#!/usr/bin/env python3
+"""
+Comprehensive test script for Enhanced Gaza First Aid RAG Assistant
+Tests all major components and validates improvements
+"""
+import os
+import sys
+import time
+import logging
+import traceback
+from pathlib import Path
+import asyncio
+# Configure logging for testing
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def test_imports():
+    """Test all required imports"""
+    print("🔍 Testing imports...")
+    try:
+        import torch
+        print(f"✅ PyTorch: {torch.__version__}")
+        import transformers
+        print(f"✅ Transformers: {transformers.__version__}")
+        import sentence_transformers
+        print(f"✅ Sentence Transformers: {sentence_transformers.__version__}")
+        import faiss
+        print(f"✅ FAISS: {faiss.__version__}")
+        import gradio as gr
+        print(f"✅ Gradio: {gr.__version__}")
+        from llama_index.core import Document
+        print("✅ LlamaIndex Core")
+        from llama_index.vector_stores.faiss import FaissVectorStore
+        print("✅ LlamaIndex FAISS")
+        from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+        print("✅ LlamaIndex HuggingFace Embeddings")
+        import PyPDF2
+        print(f"✅ PyPDF2: {PyPDF2.__version__}")
+        return True
+    except ImportError as e:
+        print(f"❌ Import error: {e}")
+        return False
+def test_data_availability():
+    """Test if medical data is available"""
+    print("\n📁 Testing data availability...")
+    data_dir = Path("./data")
+    if not data_dir.exists():
+        print("❌ Data directory not found")
+        return False
+    pdf_files = list(data_dir.glob("*.pdf"))
+    txt_files = list(data_dir.glob("*.txt"))
+    print(f"✅ Found {len(pdf_files)} PDF files")
+    print(f"✅ Found {len(txt_files)} text files")
+    if len(pdf_files) == 0 and len(txt_files) == 0:
+        print("❌ No medical documents found")
+        return False
+    # Show sample files
+    for i, pdf_file in enumerate(pdf_files[:3]):
+        size_mb = pdf_file.stat().st_size / (1024 * 1024)
+        print(f"   📄 {pdf_file.name} ({size_mb:.1f} MB)")
+    return True
+def test_embedding_model():
+    """Test embedding model loading and functionality"""
+    print("\n🧠 Testing embedding model...")
+    try:
+        from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+        # Test higher-dimensional model
+        print("Loading all-mpnet-base-v2 (768-dim)...")
+        embedding_model = HuggingFaceEmbedding(
+            model_name="sentence-transformers/all-mpnet-base-v2",
+            device='cpu',
+            embed_batch_size=2
+        )
+        # Test embedding generation
+        test_text = "How to treat burns with limited water supply?"
+        start_time = time.time()
+        embedding = embedding_model.get_text_embedding(test_text)
+        embedding_time = time.time() - start_time
+        print(f"✅ Embedding dimension: {len(embedding)}")
+        print(f"✅ Embedding time: {embedding_time:.2f}s")
+        print(f"✅ Sample embedding values: {embedding[:5]}")
+        return True, embedding_model
+    except Exception as e:
+        print(f"❌ Embedding model error: {e}")
+        traceback.print_exc()
+        return False, None
+def test_faiss_indexing():
+    """Test FAISS indexing functionality"""
+    print("\n🔍 Testing FAISS indexing...")
+    try:
+        import faiss
+        import numpy as np
+        # Test different index types
+        dimension = 768
+        # Test flat index
+        flat_index = faiss.IndexFlatL2(dimension)
+        print(f"✅ Created IndexFlatL2 (dimension: {dimension})")
+        # Test IVF index
+        nlist = 10  # Small for testing
+        quantizer = faiss.IndexFlatL2(dimension)
+        ivf_index = faiss.IndexIVFFlat(quantizer, dimension, nlist)
+        print(f"✅ Created IndexIVFFlat (clusters: {nlist})")
+        # Test with sample data
+        sample_vectors = np.random.random((50, dimension)).astype('float32')
+        # Train IVF index
+        ivf_index.train(sample_vectors)
+        print("✅ IVF index training completed")
+        # Add vectors
+        flat_index.add(sample_vectors)
+        ivf_index.add(sample_vectors)
+        print(f"✅ Added {len(sample_vectors)} vectors to indices")
+        # Test search
+        query_vector = np.random.random((1, dimension)).astype('float32')
+        start_time = time.time()
+        flat_distances, flat_indices = flat_index.search(query_vector, 5)
+        flat_time = time.time() - start_time
+        start_time = time.time()
+        ivf_distances, ivf_indices = ivf_index.search(query_vector, 5)
+        ivf_time = time.time() - start_time
+        print(f"✅ Flat search time: {flat_time:.4f}s")
+        print(f"✅ IVF search time: {ivf_time:.4f}s")
+        print(f"✅ Speed improvement: {flat_time/ivf_time:.2f}x")
+        return True
+    except Exception as e:
+        print(f"❌ FAISS indexing error: {e}")
+        traceback.print_exc()
+        return False
+def test_knowledge_base():
+    """Test knowledge base initialization and search"""
+    print("\n📚 Testing knowledge base...")
+    try:
+        # Import the enhanced system
+        sys.path.append('.')
+        from enhanced_gaza_rag_app import EnhancedGazaKnowledgeBase
+        # Initialize knowledge base
+        print("Initializing knowledge base...")
+        kb = EnhancedGazaKnowledgeBase(data_dir="./data")
+        start_time = time.time()
+        kb.initialize()
+        init_time = time.time() - start_time
+        print(f"✅ Knowledge base initialized in {init_time:.2f}s")
+        print(f"✅ Chunks created: {len(kb.chunk_metadata)}")
+        # Test search functionality
+        test_queries = [
+            "How to treat burns?",
+            "Managing bleeding wounds",
+            "Signs of infection",
+            "Emergency care for children"
+        ]
+        for query in test_queries:
+            start_time = time.time()
+            results = kb.search(query, k=3)
+            search_time = time.time() - start_time
+            print(f"✅ Query: '{query}' -> {len(results)} results in {search_time:.3f}s")
+            if results:
+                best_result = results[0]
+                print(f"   📄 Best match: {best_result.get('source', 'unknown')}")
+                print(f"   🎯 Score: {best_result.get('score', 0):.3f}")
+                print(f"   🏥 Priority: {best_result.get('medical_priority', 'general')}")
+        return True, kb
+    except Exception as e:
+        print(f"❌ Knowledge base error: {e}")
+        traceback.print_exc()
+        return False, None
+def test_llm_loading():
+    """Test LLM loading and inference"""
+    print("\n🤖 Testing LLM loading...")
+    try:
+        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
+        import torch
+        model_name = "microsoft/Phi-3-mini-4k-instruct"
+        print(f"Loading {model_name}...")
+        # Test quantization config
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4"
+        )
+        start_time = time.time()
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            quantization_config=quantization_config,
+            device_map="auto",
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        )
+        loading_time = time.time() - start_time
+        print(f"✅ Model loaded in {loading_time:.2f}s")
+        # Test pipeline creation
+        generation_pipeline = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            return_full_text=False
+        )
+        print("✅ Generation pipeline created")
+        # Test inference
+        test_prompt = "How to treat a burn injury: "
+        start_time = time.time()
+        response = generation_pipeline(
+            test_prompt,
+            max_new_tokens=50,
+            temperature=0.2,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        inference_time = time.time() - start_time
+        if response and len(response) > 0:
+            generated_text = response[0]['generated_text']
+            print(f"✅ Inference completed in {inference_time:.2f}s")
+            print(f"✅ Generated text: {generated_text[:100]}...")
+        else:
+            print("❌ No response generated")
+            return False
+        return True
+    except Exception as e:
+        print(f"❌ LLM loading error: {e}")
+        traceback.print_exc()
+        return False
+def test_full_system():
+    """Test the complete enhanced system"""
+    print("\n🚀 Testing complete enhanced system...")
+    try:
+        # Import the enhanced system
+        from enhanced_gaza_rag_app import initialize_enhanced_system, process_medical_query_with_progress
+        print("Initializing complete system...")
+        start_time = time.time()
+        system = initialize_enhanced_system()
+        init_time = time.time() - start_time
+        print(f"✅ Complete system initialized in {init_time:.2f}s")
+        # Test queries
+        test_queries = [
+            "How to treat severe burns when water is limited?",
+            "Managing gunshot wounds with basic supplies",
+            "Signs of wound infection to watch for"
+        ]
+        for query in test_queries:
+            print(f"\n🔍 Testing query: '{query}'")
+            start_time = time.time()
+            response, metadata, status = process_medical_query_with_progress(query)
+            query_time = time.time() - start_time
+            print(f"✅ Query processed in {query_time:.2f}s")
+            print(f"📝 Response length: {len(response)} characters")
+            print(f"📊 Metadata: {metadata}")
+            print(f"🛡️ Status: {status}")
+            # Check response quality
+            if len(response) > 50 and "error" not in response.lower():
+                print("✅ Response quality: Good")
+            else:
+                print("⚠️ Response quality: Needs improvement")
+        return True
+    except Exception as e:
+        print(f"❌ Full system test error: {e}")
+        traceback.print_exc()
+        return False
+def test_ui_components():
+    """Test UI components and interface"""
+    print("\n🎨 Testing UI components...")
+    try:
+        from enhanced_ui_gaza_rag_app import create_advanced_gradio_interface
+        print("Creating advanced Gradio interface...")
+        start_time = time.time()
+        interface = create_advanced_gradio_interface()
+        ui_time = time.time() - start_time
+        print(f"✅ UI created in {ui_time:.2f}s")
+        print("✅ Advanced CSS styling applied")
+        print("✅ Progress indicators configured")
+        print("✅ Gaza-specific theming applied")
+        print("✅ Interactive elements configured")
+        return True
+    except Exception as e:
+        print(f"❌ UI components error: {e}")
+        traceback.print_exc()
+        return False
+def run_performance_benchmark():
+    """Run performance benchmarks"""
+    print("\n⚡ Running performance benchmarks...")
+    try:
+        from enhanced_gaza_rag_app import initialize_enhanced_system
+        system = initialize_enhanced_system()
+        # Benchmark queries
+        benchmark_queries = [
+            "How to treat burns?",
+            "Managing bleeding wounds",
+            "Signs of infection",
+            "Emergency care procedures",
+            "Trauma management protocols"
+        ]
+        total_time = 0
+        successful_queries = 0
+        for i, query in enumerate(benchmark_queries):
+            try:
+                start_time = time.time()
+                result = system.generate_response(query)
+                query_time = time.time() - start_time
+                total_time += query_time
+                successful_queries += 1
+                print(f"✅ Query {i+1}: {query_time:.2f}s")
+            except Exception as e:
+                print(f"❌ Query {i+1} failed: {e}")
+        if successful_queries > 0:
+            avg_time = total_time / successful_queries
+            print(f"\n📊 Performance Summary:")
+            print(f"   Average query time: {avg_time:.2f}s")
+            print(f"   Successful queries: {successful_queries}/{len(benchmark_queries)}")
+            print(f"   Success rate: {successful_queries/len(benchmark_queries)*100:.1f}%")
+        return True
+    except Exception as e:
+        print(f"❌ Performance benchmark error: {e}")
+        traceback.print_exc()
+        return False
+def main():
+    """Run comprehensive test suite"""
+    print("🧪 Enhanced Gaza First Aid RAG Assistant - Comprehensive Test Suite")
+    print("=" * 70)
+    test_results = {}
+    # Run all tests
+    tests = [
+        ("Import Dependencies", test_imports),
+        ("Data Availability", test_data_availability),
+        ("Embedding Model", lambda: test_embedding_model()[0]),
+        ("FAISS Indexing", test_faiss_indexing),
+        ("Knowledge Base", lambda: test_knowledge_base()[0]),
+        ("LLM Loading", test_llm_loading),
+        ("Full System", test_full_system),
+        ("UI Components", test_ui_components),
+        ("Performance Benchmark", run_performance_benchmark)
+    ]
+    passed_tests = 0
+    total_tests = len(tests)
+    for test_name, test_func in tests:
+        print(f"\n{'='*50}")
+        print(f"🧪 Running: {test_name}")
+        print(f"{'='*50}")
+        try:
+            result = test_func()
+            test_results[test_name] = result
+            if result:
+                passed_tests += 1
+                print(f"✅ {test_name}: PASSED")
+            else:
+                print(f"❌ {test_name}: FAILED")
+        except Exception as e:
+            test_results[test_name] = False
+            print(f"❌ {test_name}: ERROR - {e}")
+    # Final summary
+    print(f"\n{'='*70}")
+    print("🏁 TEST SUMMARY")
+    print(f"{'='*70}")
+    for test_name, result in test_results.items():
+        status = "✅ PASSED" if result else "❌ FAILED"
+        print(f"{test_name:.<40} {status}")
+    print(f"\nOverall Results: {passed_tests}/{total_tests} tests passed")
+    print(f"Success Rate: {passed_tests/total_tests*100:.1f}%")
+    if passed_tests == total_tests:
+        print("\n🎉 ALL TESTS PASSED! Enhanced system is ready for deployment.")
+    elif passed_tests >= total_tests * 0.8:
+        print("\n⚠️ Most tests passed. System is functional with minor issues.")
+    else:
+        print("\n🚨 Multiple test failures. System needs attention before deployment.")
+    return passed_tests == total_tests
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)