from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from langchain_community.llms import Ollama  # Correct Import
import logging
import time  # Import time module

# Configure logging
logging.basicConfig(level=logging.INFO)

app = FastAPI()

# OpenAI-compatible request format
class OpenAIRequest(BaseModel):
    model: str
    messages: list
    stream: bool = False  # Default to non-streaming

# Initialize LangChain LLM with Ollama
def get_llm(model_name: str):
    return Ollama(model=model_name)
@app.get("/")
def home():
    return {"message": "OpenAI-compatible LangChain + Ollama API is running"}
@app.post("/v1/chat/completions")
def generate_text(request: OpenAIRequest):
    try:
        llm = get_llm(request.model)

        # Extract last user message from messages
        user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
        if not user_message:
            raise HTTPException(status_code=400, detail="User message is required")

        response_text = llm.invoke(user_message)

        # OpenAI-like response format
        response = {
            "id": "chatcmpl-123",
            "object": "chat.completion",
            "created": int(time.time()),
            "model": request.model,
            "choices": [
                {
                    "index": 0,
                    "message": {"role": "assistant", "content": response_text},
                    "finish_reason": "stop",
                }
            ],
            "usage": {
                "prompt_tokens": len(user_message.split()),
                "completion_tokens": len(response_text.split()),
                "total_tokens": len(user_message.split()) + len(response_text.split()),
            }
        }

        return response

    except Exception as e:
        logging.error(f"Error generating response: {e}")
        raise HTTPException(status_code=500, detail="Internal server error")