D-PII-Study / app.py
Al-Alcoba-Inciarte's picture
Update app.py
557ff8c verified
import gradio as gr
import subprocess
import requests
import time
import logging
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Cache for loaded models
loaded_models = {}
def check_ollama_running():
"""Wait until Ollama is fully ready."""
url = "http://127.0.0.1:11434/api/tags"
for _ in range(10): # Try for ~10 seconds
try:
response = requests.get(url, timeout=2)
if response.status_code == 200:
logger.info("Ollama is running.")
return True
except requests.exceptions.RequestException:
logger.warning("Waiting for Ollama to start...")
time.sleep(2)
raise RuntimeError("Ollama is not running. Please check the server.")
def pull_model(model_name):
"""Ensure the model is available before use."""
if model_name in loaded_models:
logger.info(f"Model {model_name} is already loaded.")
return
try:
subprocess.run(["ollama", "pull", model_name], check=True)
logger.info(f"Model {model_name} pulled successfully.")
loaded_models[model_name] = True
except subprocess.CalledProcessError as e:
logger.error(f"Failed to pull model {model_name}: {e}")
raise
def get_llm(model_name):
"""Get an LLM instance with streaming enabled."""
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
return Ollama(model=model_name, base_url="http://127.0.0.1:11434", callback_manager=callback_manager)
def query_model(model_name, prompt):
"""Generate responses from the model with streaming."""
check_ollama_running() # Ensure Ollama is ready
pull_model(model_name) # Make sure the model is available
llm = get_llm(model_name) # Load the model
response = ""
for token in llm.stream(prompt):
response += token
yield response # Stream response in real-time
# Define Gradio interface
iface = gr.Interface(
fn=query_model,
inputs=[
gr.Dropdown(["deepseek-r1:1.5b", "mistral:7b"], label="Select Model"),
gr.Textbox(label="Enter your prompt")
],
outputs="text",
title="Ollama via LangChain & Gradio",
description="Enter a prompt to interact with the Ollama-based model with streaming response.",
flagging_dir="/app/flagged"
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)