from fastapi import FastAPI, HTTPException from pydantic import BaseModel from langchain_community.llms import Ollama # Correct Import import logging import time # Import time module # Configure logging logging.basicConfig(level=logging.INFO) app = FastAPI() # OpenAI-compatible request format class OpenAIRequest(BaseModel): model: str messages: list stream: bool = False # Default to non-streaming # Initialize LangChain LLM with Ollama def get_llm(model_name: str): return Ollama(model=model_name) @app.get("/") def home(): return {"message": "OpenAI-compatible LangChain + Ollama API is running"} @app.post("/v1/chat/completions") def generate_text(request: OpenAIRequest): try: llm = get_llm(request.model) # Extract last user message from messages user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None) if not user_message: raise HTTPException(status_code=400, detail="User message is required") response_text = llm.invoke(user_message) # OpenAI-like response format response = { "id": "chatcmpl-123", "object": "chat.completion", "created": int(time.time()), "model": request.model, "choices": [ { "index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop", } ], "usage": { "prompt_tokens": len(user_message.split()), "completion_tokens": len(response_text.split()), "total_tokens": len(user_message.split()) + len(response_text.split()), } } return response except Exception as e: logging.error(f"Error generating response: {e}") raise HTTPException(status_code=500, detail="Internal server error")