from flask import Flask, request, jsonify, render_template_string import os import requests import json import logging from typing import Dict, Any, List import time app = Flask(__name__) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Configuration OLLAMA_API_URL = os.getenv('OLLAMA_API_URL', 'https://huggingface.co/spaces/tommytracx/ollama-api') DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',') MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048')) TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7')) class OllamaClient: def __init__(self, api_url: str): self.api_url = api_url.rstrip('/') self.available_models = DEFAULT_MODEL # Initialize with default models self.refresh_models() def refresh_models(self) -> None: """Refresh the list of available models from the API, falling back to defaults on failure.""" try: response = requests.get(f"{self.api_url}/api/models", timeout=10) response.raise_for_status() data = response.json() if data.get('status') == 'success' and isinstance(data.get('models'), list): self.available_models = data['models'] logging.info(f"Successfully fetched models: {self.available_models}") else: logging.warning(f"Invalid response format from API: {data}") self.available_models = DEFAULT_MODEL except Exception as e: logging.error(f"Error refreshing models: {e}") self.available_models = DEFAULT_MODEL def list_models(self) -> List[str]: """Return the list of available models.""" return self.available_models def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]: """Generate text using a model.""" if model_name not in self.available_models: return {"status": "error", "message": f"Model {model_name} not available"} try: payload = { "model": model_name, "prompt": prompt, "stream": False, **kwargs } response = requests.post(f"{self.api_url}/api/generate", json=payload, timeout=120) response.raise_for_status() data = response.json() if data.get('status') == 'success': return { "status": "success", "response": data.get('response', ''), "model": model_name, "usage": data.get('usage', {}) } return {"status": "error", "message": data.get('message', 'Unknown error')} except Exception as e: logging.error(f"Error generating response: {e}") return {"status": "error", "message": str(e)} def health_check(self) -> Dict[str, Any]: """Check the health of the Ollama API.""" try: response = requests.get(f"{self.api_url}/health", timeout=10) response.raise_for_status() return response.json() except Exception as e: logging.error(f"Health check failed: {e}") return {"status": "unhealthy", "error": str(e)} # Initialize Ollama client ollama_client = OllamaClient(OLLAMA_API_URL) # HTML template for the chat interface HTML_TEMPLATE = ''' OpenWebUI - Ollama Chat

🤖 OpenWebUI

Chat with your local Ollama models through Hugging Face Spaces

0.7
AI
Hello! I'm your AI assistant powered by Ollama. How can I help you today?
AI is thinking...
''' @app.route('/') def home(): """Main chat interface.""" return render_template_string(HTML_TEMPLATE, ollama_api_url=OLLAMA_API_URL, default_model=DEFAULT_MODEL) @app.route('/api/chat', methods=['POST']) def chat(): """Chat API endpoint.""" try: data = request.get_json() if not data or 'message' not in data or 'model' not in data: return jsonify({"status": "error", "message": "Message and model are required"}), 400 message = data['message'] model = data['model'] temperature = data.get('temperature', TEMPERATURE) max_tokens = data.get('max_tokens', MAX_TOKENS) result = ollama_client.generate(model, message, temperature=temperature, max_tokens=max_tokens) return jsonify(result), 200 if result["status"] == "success" else 500 except Exception as e: logging.error(f"Chat endpoint error: {e}") return jsonify({"status": "error", "message": str(e)}), 500 @app.route('/api/models', methods=['GET']) def get_models(): """Get available models.""" try: models = ollama_client.list_models() return jsonify({ "status": "success", "models": models, "count": len(models) }) except Exception as e: logging.error(f"Models endpoint error: {e}") return jsonify({"status": "error", "message": str(e)}), 500 @app.route('/health', methods=['GET']) def health_check(): """Health check endpoint.""" try: ollama_health = ollama_client.health_check() return jsonify({ "status": "healthy", "ollama_api": ollama_health, "timestamp": time.time() }) except Exception as e: logging.error(f"Health check endpoint error: {e}") return jsonify({ "status": "unhealthy", "error": str(e), "timestamp": time.time() }), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860, debug=False)