tommytracx's picture
Update app.py
170a0e7 verified
raw
history blame
19.2 kB
# app.py
from flask import Flask, request, jsonify, render_template_string
import os
import requests
import json
import logging
from typing import Dict, Any, List
import time
app = Flask(__name__)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Configuration
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'https://huggingface.co/spaces/tommytracx/ollama-api')
ALLOWED_MODELS = os.getenv('ALLOWED_MODELS', 'llama2,llama2:13b,llama2:70b,codellama,neural-chat,gemma-3-270m').split(',')
MAX_TOKENS = int(os.getenv('MAX_TOKENS', '2048'))
TEMPERATURE = float(os.getenv('TEMPERATURE', '0.7'))
class OllamaManager:
def __init__(self, base_url: str):
self.base_url = base_url.rstrip('/')
self.available_models = ALLOWED_MODELS # Initialize with allowed models
self.refresh_models()
def refresh_models(self) -> None:
"""Refresh the list of available models from Ollama API, falling back to allowed models."""
try:
response = requests.get(f"{self.base_url}/api/tags", timeout=10)
response.raise_for_status()
data = response.json()
models = [model['name'] for model in data.get('models', [])]
# Filter models to only include those in ALLOWED_MODELS
self.available_models = [model for model in models if model in ALLOWED_MODELS]
if not self.available_models:
self.available_models = ALLOWED_MODELS
logging.warning("No allowed models found in API response, using ALLOWED_MODELS")
logging.info(f"Available models: {self.available_models}")
except Exception as e:
logging.error(f"Error refreshing models: {e}")
self.available_models = ALLOWED_MODELS
def list_models(self) -> List[str]:
"""Return the list of available models."""
return self.available_models
def generate(self, model_name: str, prompt: str, **kwargs) -> Dict[str, Any]:
"""Generate text using a model."""
if model_name not in self.available_models:
return {"status": "error", "message": f"Model {model_name} not available"}
try:
payload = {
"model": model_name,
"prompt": prompt,
"stream": False,
**kwargs
}
response = requests.post(f"{self.base_url}/api/generate", json=payload, timeout=120)
response.raise_for_status()
data = response.json()
return {
"status": "success",
"response": data.get('response', ''),
"model": model_name,
"usage": data.get('usage', {})
}
except Exception as e:
logging.error(f"Error generating response: {e}")
return {"status": "error", "message": str(e)}
def health_check(self) -> Dict[str, Any]:
"""Check the health of the Ollama API."""
try:
response = requests.get(f"{self.base_url}/api/tags", timeout=10)
response.raise_for_status()
return {"status": "healthy", "available_models": len(self.available_models)}
except Exception as e:
logging.error(f"Health check failed: {e}")
return {"status": "unhealthy", "error": str(e)}
# Initialize Ollama manager
ollama_manager = OllamaManager(OLLAMA_BASE_URL)
# HTML template for the chat interface
HTML_TEMPLATE = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenWebUI - Ollama Chat</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 40px rgba(0,0,0,0.1);
overflow: hidden;
}
.header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 30px;
text-align: center;
}
.header h1 {
font-size: 2.5rem;
margin-bottom: 10px;
font-weight: 700;
}
.header p {
font-size: 1.1rem;
opacity: 0.9;
}
.controls {
padding: 20px 30px;
background: #f8f9fa;
border-bottom: 1px solid #e9ecef;
display: flex;
gap: 15px;
align-items: center;
flex-wrap: wrap;
}
.control-group {
display: flex;
align-items: center;
gap: 8px;
}
.control-group label {
font-weight: 600;
color: #495057;
min-width: 80px;
}
.control-group select,
.control-group input {
padding: 8px 12px;
border: 2px solid #e9ecef;
border-radius: 8px;
font-size: 14px;
transition: border-color 0.3s;
}
.control-group select:focus,
.control-group input:focus {
outline: none;
border-color: #667eea;
}
.chat-container {
height: 500px;
overflow-y: auto;
padding: 20px;
background: #fafbfc;
}
.message {
margin-bottom: 20px;
display: flex;
gap: 15px;
}
.message.user {
flex-direction: row-reverse;
}
.message-avatar {
width: 40px;
height: 40px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-weight: bold;
color: white;
flex-shrink: 0;
}
.message.user .message-avatar {
background: #667eea;
}
.message.assistant .message-avatar {
background: #28a745;
}
.message-content {
background: white;
padding: 15px 20px;
border-radius: 18px;
max-width: 70%;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
line-height: 1.5;
}
.message.user .message-content {
background: #667eea;
color: white;
}
.message.assistant .message-content {
background: white;
color: #333;
}
.input-container {
padding: 20px 30px;
background: white;
border-top: 1px solid #e9ecef;
}
.input-form {
display: flex;
gap: 15px;
}
.input-field {
flex: 1;
padding: 15px 20px;
border: 2px solid #e9ecef;
border-radius: 25px;
font-size: 16px;
transition: border-color 0.3s;
resize: none;
min-height: 50px;
max-height: 120px;
}
.input-field:focus {
outline: none;
border-color: #667eea;
}
.send-button {
padding: 15px 30px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
border-radius: 25px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: transform 0.2s;
min-width: 100px;
}
.send-button:hover {
transform: translateY(-2px);
}
.send-button:disabled {
opacity: 0.6;
cursor: not-allowed;
transform: none;
}
.status {
text-align: center;
padding: 10px;
font-size: 14px;
color: #6c757d;
}
.status.error {
color: #dc3545;
}
.status.success {
color: #28a745;
}
.typing-indicator {
display: none;
padding: 15px 20px;
background: white;
border-radius: 18px;
color: #6c757d;
font-style: italic;
}
@media (max-width: 768px) {
.controls {
flex-direction: column;
align-items: stretch;
}
.control-group {
justify-content: space-between;
}
.message-content {
max-width: 85%;
}
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>🤖 OpenWebUI</h1>
<p>Chat with your local Ollama models through Hugging Face Spaces</p>
</div>
<div class="controls">
<div class="control-group">
<label for="model-select">Model:</label>
<select id="model-select">
<option value="">Select a model...</option>
</select>
</div>
<div class="control-group">
<label for="temperature">Temperature:</label>
<input type="range" id="temperature" min="0" max="2" step="0.1" value="0.7">
<span id="temp-value">0.7</span>
</div>
<div class="control-group">
<label for="max-tokens">Max Tokens:</label>
<input type="number" id="max-tokens" min="1" max="4096" value="2048">
</div>
</div>
<div class="chat-container" id="chat-container">
<div class="message assistant">
<div class="message-avatar">AI</div>
<div class="message-content">
Hello! I'm your AI assistant powered by Ollama. How can I help you today?
</div>
</div>
</div>
<div class="typing-indicator" id="typing-indicator">
AI is thinking...
</div>
<div class="input-container">
<form class="input-form" id="chat-form">
<textarea
class="input-field"
id="message-input"
placeholder="Type your message here..."
rows="1"
></textarea>
<button type="submit" class="send-button" id="send-button">
Send
</button>
</form>
</div>
<div class="status" id="status"></div>
</div>
<script>
let conversationHistory = [];
document.addEventListener('DOMContentLoaded', function() {
loadModels();
setupEventListeners();
autoResizeTextarea();
});
async function loadModels() {
const modelSelect = document.getElementById('model-select');
modelSelect.innerHTML = '<option value="">Loading models...</option>';
try {
const response = await fetch('/api/models');
const data = await response.json();
modelSelect.innerHTML = '<option value="">Select a model...</option>';
if (data.status === 'success' && data.models.length > 0) {
data.models.forEach(model => {
const option = document.createElement('option');
option.value = model;
option.textContent = model;
if (model === 'gemma-3-270m') {
option.selected = true;
}
modelSelect.appendChild(option);
});
showStatus('Models loaded successfully', 'success');
} else {
modelSelect.innerHTML = '<option value="">No models available</option>';
showStatus('No models available from API', 'error');
}
} catch (error) {
console.error('Error loading models:', error);
modelSelect.innerHTML = '<option value="">No models available</option>';
showStatus('Failed to load models: ' + error.message, 'error');
}
}
function setupEventListeners() {
document.getElementById('chat-form').addEventListener('submit', handleSubmit);
document.getElementById('temperature').addEventListener('input', function() {
document.getElementById('temp-value').textContent = this.value;
});
document.getElementById('message-input').addEventListener('input', autoResizeTextarea);
}
function autoResizeTextarea() {
const textarea = document.getElementById('message-input');
textarea.style.height = 'auto';
textarea.style.height = Math.min(textarea.scrollHeight, 120) + 'px';
}
async function handleSubmit(e) {
e.preventDefault();
const messageInput = document.getElementById('message-input');
const message = messageInput.value.trim();
if (!message) return;
const model = document.getElementById('model-select').value;
const temperature = parseFloat(document.getElementById('temperature').value);
const maxTokens = parseInt(document.getElementById('max-tokens').value);
if (!model) {
showStatus('Please select a model', 'error');
return;
}
addMessage(message, 'user');
messageInput.value = '';
autoResizeTextarea();
showTypingIndicator(true);
try {
const response = await fetch('/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt: message, temperature, max_tokens: maxTokens })
});
const data = await response.json();
showTypingIndicator(false);
if (data.status === 'success') {
addMessage(data.response, 'assistant');
showStatus(`Response generated using ${model}`, 'success');
} else {
addMessage('Sorry, I encountered an error while processing your request.', 'assistant');
showStatus(`Error: ${data.message}`, 'error');
}
} catch (error) {
showTypingIndicator(false);
addMessage('Sorry, I encountered a network error.', 'assistant');
showStatus('Network error: ' + error.message, 'error');
}
}
function addMessage(content, sender) {
const chatContainer = document.getElementById('chat-container');
const messageDiv = document.createElement('div');
messageDiv.className = `message ${sender}`;
const avatar = document.createElement('div');
avatar.className = 'message-avatar';
avatar.textContent = sender === 'user' ? 'U' : 'AI';
const messageContent = document.createElement('div');
messageContent.className = 'message-content';
messageContent.textContent = content;
messageDiv.appendChild(avatar);
messageDiv.appendChild(messageContent);
chatContainer.appendChild(messageDiv);
chatContainer.scrollTop = chatContainer.scrollHeight;
conversationHistory.push({ role: sender, content: content });
}
function showTypingIndicator(show) {
const indicator = document.getElementById('typing-indicator');
indicator.style.display = show ? 'block' : 'none';
if (show) {
const chatContainer = document.getElementById('chat-container');
chatContainer.scrollTop = chatContainer.scrollHeight;
}
}
function showStatus(message, type = '') {
const statusDiv = document.getElementById('status');
statusDiv.textContent = message;
statusDiv.className = `status ${type}`;
setTimeout(() => {
statusDiv.textContent = '';
statusDiv.className = 'status';
}, 5000);
}
</script>
</body>
</html>
'''
@app.route('/')
def home():
"""Main chat interface."""
return render_template_string(HTML_TEMPLATE, ollama_base_url=OLLAMA_BASE_URL, default_model=ALLOWED_MODELS)
@app.route('/api/chat', methods=['POST'])
def chat():
"""Chat API endpoint."""
try:
data = request.get_json()
if not data or 'prompt' not in data or 'model' not in data:
return jsonify({"status": "error", "message": "Prompt and model are required"}), 400
prompt = data['prompt']
model = data['model']
temperature = data.get('temperature', TEMPERATURE)
max_tokens = data.get('max_tokens', MAX_TOKENS)
result = ollama_manager.generate(model, prompt, temperature=temperature, max_tokens=max_tokens)
return jsonify(result), 200 if result["status"] == "success" else 500
except Exception as e:
logging.error(f"Chat endpoint error: {e}")
return jsonify({"status": "error", "message": str(e)}), 500
@app.route('/api/models', methods=['GET'])
def get_models():
"""Get available models."""
try:
models = ollama_manager.list_models()
return jsonify({
"status": "success",
"models": models,
"count": len(models)
})
except Exception as e:
logging.error(f"Models endpoint error: {e}")
return jsonify({"status": "error", "message": str(e)}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
try:
ollama_health = ollama_manager.health_check()
return jsonify({
"status": "healthy",
"ollama_api": ollama_health,
"timestamp": time.time()
})
except Exception as e:
logging.error(f"Health check endpoint error: {e}")
return jsonify({
"status": "unhealthy",
"error": str(e),
"timestamp": time.time()
}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860, debug=False)