import requests import json import pprint import time import sys import os import numpy as np def check_internet_connectivity(): """Check if we can connect to the internet""" print("Testing internet connectivity...") try: response = requests.get("https://huggingface.co", timeout=5) print(f"Connection to huggingface.co: Status {response.status_code}") return response.status_code == 200 except Exception as e: print(f"Error connecting to huggingface.co: {str(e)}") return False def check_model_repository(): """Check if we can connect to the specific model repository""" print("Testing connection to model repository...") try: # Try to access the model repository url = "https://huggingface.co/allenai/longformer-base-4096" response = requests.get(url, timeout=5) print(f"Connection to model repository: Status {response.status_code}") return response.status_code == 200 except Exception as e: print(f"Error connecting to model repository: {str(e)}") return False def check_debug_endpoint(api_url): """Check the debug endpoint for diagnostic information""" print(f"Checking debug endpoint at {api_url.replace('/predict', '/debug')}...") try: response = requests.get(api_url.replace("/predict", "/debug"), timeout=10) if response.status_code == 200: debug_info = response.json() print("Debug information retrieved:") print(f"- API Status: {debug_info.get('api_status', 'Unknown')}") print(f"- Model Loaded: {debug_info.get('model_loaded', 'Unknown')}") print(f"- Cache Directory Exists: {debug_info.get('model_cache_exists', 'Unknown')}") print(f"- Temp Directory Writable: {debug_info.get('tmp_directory_writable', 'Unknown')}") # Check internet connectivity from the server internet_check = debug_info.get('internet_connectivity', {}) print(f"- Server Internet Connectivity: {internet_check.get('status', 'Unknown')}") if internet_check.get('message'): print(f" Message: {internet_check.get('message')}") # Check tokenizer test tokenizer_test = debug_info.get('tokenizer_test', {}) print(f"- Tokenizer Test: {tokenizer_test.get('status', 'Unknown')}") if tokenizer_test.get('message'): print(f" Message: {tokenizer_test.get('message')}") # Check disk space disk_space = debug_info.get('disk_space', {}) if disk_space.get('status') == 'ok': print(f"- Disk Space: Total: {disk_space.get('total_gb', 0):.2f} GB, Used: {disk_space.get('used_gb', 0):.2f} GB, Free: {disk_space.get('free_gb', 0):.2f} GB ({disk_space.get('percent_used', 0):.1f}% used)") return debug_info else: print(f"Error accessing debug endpoint: Status {response.status_code}") print(response.text) return None except Exception as e: print(f"Exception when accessing debug endpoint: {str(e)}") return None # API endpoint on Hugging Face Spaces API_URL = "https://angusfung-kickstarter-success-prediction.hf.space/predict" # Sample input data (similar to what would be in input.json) campaign_data = { "raw_description": "Introducing the AquaGo: The Smart, Eco-Friendly Portable Water Purifier! Clean water is a basic human right — yet for millions around the world, it's a daily struggle. Whether you're an outdoor adventurer, traveling to remote areas, or preparing for emergencies, access to safe drinking water should never be a compromise. That's why we created **AquaGo**, a revolutionary portable water purifier that combines cutting-edge filtration technology, smart sensors, and sustainable materials — all packed into a sleek, lightweight design you can take anywhere.", "raw_blurb": "AquaGo is a smart, eco-friendly portable water purifier that delivers clean, safe drinking water anywhere.", "raw_risks": "Bringing a product to market involves complex engineering, regulatory approvals, and safety testing. Delays may occur due to certification or supply chain issues.", "raw_subcategory": "Gadgets", "raw_category": "Technology", "raw_country": "Canada", "funding_goal": 2000, "image_count": 8, "video_count": 3, "campaign_duration": 90, "previous_projects_count": 5, "previous_success_rate": 0.4, "previous_pledged": 18745.33, "previous_funding_goal": 23564.99 } def predict_success(data, max_retries=3, retry_delay=10): """Send data to the API and get prediction results with retries""" for attempt in range(max_retries): try: # Make the POST request to the API print(f"Sending request to: {API_URL} (Attempt {attempt + 1}/{max_retries})") response = requests.post(API_URL, json=data, timeout=60) # Check if the request was successful if response.status_code == 200: return response.json() else: print(f"Error: {response.status_code}") print(response.text) if response.status_code == 500 and "Can't load tokenizer" in response.text: print(f"The model might be downloading. Waiting {retry_delay} seconds before retry...") time.sleep(retry_delay) else: # For other errors, don't retry return None except Exception as e: print(f"Exception occurred: {str(e)}") print(f"Waiting {retry_delay} seconds before retry...") time.sleep(retry_delay) return None def display_results(results): """Display the prediction results in a user-friendly way""" if not results: print("No results to display.") return print("\n===== KICKSTARTER SUCCESS PREDICTION =====\n") print(f"Success Probability: {results['success_probability']:.2%}") print(f"Predicted Outcome: {results['predicted_outcome']}") print("\n----- TOP INFLUENCING FACTORS -----") # Get the top 5 factors by absolute magnitude top_factors = sorted( results['shap_values'].items(), key=lambda x: abs(float(x[1])), reverse=True )[:5] for factor, value in top_factors: impact = "POSITIVE" if float(value) > 0 else "NEGATIVE" print(f"{factor}: {value:.4f} ({impact})") print("\n----- ALL SHAP VALUES -----") pp = pprint.PrettyPrinter(indent=2) pp.pprint(results['shap_values']) # Display Longformer embedding information if available if 'longformer_embedding' in results: embedding = np.array(results['longformer_embedding']) print("\n----- LONGFORMER EMBEDDING -----") print(f"Embedding Shape: {embedding.shape if hasattr(embedding, 'shape') else len(embedding)}") print(f"First 10 values: {embedding[:10]}") # Calculate some basic statistics on the embedding try: embedding_np = np.array(embedding) print(f"Mean: {np.mean(embedding_np):.4f}") print(f"Std: {np.std(embedding_np):.4f}") print(f"Min: {np.min(embedding_np):.4f}") print(f"Max: {np.max(embedding_np):.4f}") except Exception as e: print(f"Error calculating embedding statistics: {str(e)}") # Main execution if __name__ == "__main__": print("==== DIAGNOSTICS ====") print("Testing connectivity from client machine...") internet_ok = check_internet_connectivity() repo_ok = check_model_repository() debug_info = check_debug_endpoint(API_URL) print("\n==== PREDICTION TEST ====") if not internet_ok: print("WARNING: Internet connectivity issues detected on client machine.") if not repo_ok: print("WARNING: Cannot access model repository from client machine.") print("Sending prediction request...") results = predict_success(campaign_data, max_retries=2, retry_delay=10) display_results(results)