turkish_mmlu_leaderboard / test_hf_spaces.py
alibayram's picture
Enhance logging and error handling in the Gradio app, improve model existence checks with detailed logging, and add connectivity testing feature in the submission UI. Update data manager for better model validation and submission processes.
abcde8f
#!/usr/bin/env python3
"""
Test script for debugging Hugging Face Spaces deployment issues
"""
import logging
import time
import requests
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def test_basic_connectivity():
"""Test basic internet connectivity"""
logger.info("Testing basic connectivity...")
test_urls = [
"https://ollama.com",
"https://huggingface.co",
"https://google.com"
]
for url in test_urls:
try:
logger.info(f"Testing connection to {url}")
response = requests.get(url, timeout=10)
logger.info(f"βœ… {url} - Status: {response.status_code}")
except Exception as e:
logger.error(f"❌ {url} - Error: {e}")
def test_ollama_scraping():
"""Test Ollama website scraping"""
logger.info("Testing Ollama scraping...")
try:
# Test basic page fetch
url = "https://ollama.com/library/llama2"
logger.info(f"Fetching {url}")
session = requests.Session()
session.headers.update({
"User-Agent": "Turkish-MMLU-Benchmark/1.0",
"Accept": "text/html",
})
session.timeout = 30
response = session.get(url)
logger.info(f"βœ… Ollama page fetch - Status: {response.status_code}")
logger.info(f"Content length: {len(response.text)} characters")
# Check if content contains expected elements
if "llama2" in response.text.lower():
logger.info("βœ… Content contains expected model name")
else:
logger.warning("⚠️ Content doesn't contain expected model name")
except Exception as e:
logger.error(f"❌ Ollama scraping test failed: {e}")
def test_model_validator():
"""Test the model validator"""
logger.info("Testing model validator...")
try:
from model_validator import OllamaModelValidator
validator = OllamaModelValidator()
# Test with a known model
result = validator.validate_ollama_model("llama2", "7b")
logger.info(f"βœ… Model validation result: {result.get('valid', False)}")
if result.get('valid'):
logger.info(f"Parameter size: {result.get('parameter_size')}")
logger.info(f"RAM requirements: {result.get('ram_requirements')}")
else:
logger.info(f"Validation error: {result.get('error')}")
except Exception as e:
logger.error(f"❌ Model validator test failed: {e}")
def test_data_manager():
"""Test the data manager"""
logger.info("Testing data manager...")
try:
from data_manager import DataManager
dm = DataManager()
# Test leaderboard data
leaderboard = dm.leaderboard_data
logger.info(f"βœ… Leaderboard data loaded - {len(leaderboard)} rows")
# Test model existence check
result = dm.check_model_exists("llama2", "7b")
logger.info(f"βœ… Model existence check: {result}")
except Exception as e:
logger.error(f"❌ Data manager test failed: {e}")
if __name__ == "__main__":
logger.info("Starting Hugging Face Spaces compatibility tests...")
test_basic_connectivity()
time.sleep(1)
test_ollama_scraping()
time.sleep(1)
test_model_validator()
time.sleep(1)
test_data_manager()
logger.info("All tests completed!")