Spaces:
Sleeping
Sleeping
import gradio as gr | |
import subprocess | |
import requests | |
import time | |
import logging | |
from langchain_community.llms import Ollama | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Cache for loaded models | |
loaded_models = {} | |
def check_ollama_running(): | |
"""Wait until Ollama is fully ready.""" | |
url = "http://127.0.0.1:11434/api/tags" | |
for _ in range(10): # Try for ~10 seconds | |
try: | |
response = requests.get(url, timeout=2) | |
if response.status_code == 200: | |
logger.info("Ollama is running.") | |
return True | |
except requests.exceptions.RequestException: | |
logger.warning("Waiting for Ollama to start...") | |
time.sleep(2) | |
raise RuntimeError("Ollama is not running. Please check the server.") | |
def pull_model(model_name): | |
"""Ensure the model is available before use.""" | |
if model_name in loaded_models: | |
logger.info(f"Model {model_name} is already loaded.") | |
return | |
try: | |
subprocess.run(["ollama", "pull", model_name], check=True) | |
logger.info(f"Model {model_name} pulled successfully.") | |
loaded_models[model_name] = True | |
except subprocess.CalledProcessError as e: | |
logger.error(f"Failed to pull model {model_name}: {e}") | |
raise | |
def get_llm(model_name): | |
"""Get an LLM instance with streaming enabled.""" | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
return Ollama(model=model_name, base_url="http://127.0.0.1:11434", callback_manager=callback_manager) | |
def query_model(model_name, prompt): | |
"""Generate responses from the model with streaming.""" | |
check_ollama_running() # Ensure Ollama is ready | |
pull_model(model_name) # Make sure the model is available | |
llm = get_llm(model_name) # Load the model | |
response = "" | |
for token in llm.stream(prompt): | |
response += token | |
yield response # Stream response in real-time | |
# Define Gradio interface | |
iface = gr.Interface( | |
fn=query_model, | |
inputs=[ | |
gr.Dropdown(["deepseek-r1:1.5b", "mistral:7b"], label="Select Model"), | |
gr.Textbox(label="Enter your prompt") | |
], | |
outputs="text", | |
title="Ollama via LangChain & Gradio", | |
description="Enter a prompt to interact with the Ollama-based model with streaming response.", | |
flagging_dir="/app/flagged" | |
) | |
if __name__ == "__main__": | |
iface.launch(server_name="0.0.0.0", server_port=7860) | |