ollama-embedding

Build error

App Files Files Community

redfernstech commited on 27 days ago

Commit

4bae573

verified ·

1 Parent(s): ed775de

Upload 4 files

Browse files

Files changed (4) hide show

.env +2 -0
Dockerfile +34 -25
main.py +85 -0
requirements.txt +8 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ API_KEY_USER1=your-secure-key-1
2	+ API_KEY_USER2=your-secure-key-2

Dockerfile CHANGED Viewed

@@ -1,25 +1,34 @@
-FROM ollama/ollama:latest
-# Install Python and pip
-RUN apt update && apt install -y python3 python3-pip
-# Install litellm and its proxy dependencies
-RUN pip install 'litellm[proxy]'
-# Create a directory for Ollama data
-RUN mkdir -p /.ollama && chmod -R 777 /.ollama
-WORKDIR /.ollama
-# Copy the entry point script
-COPY entrypoint.sh /entrypoint.sh
-RUN chmod +x /entrypoint.sh
-# Set the entry point script as the default command
-ENTRYPOINT ["/entrypoint.sh"]
-# Expose the port that Ollama runs on
-EXPOSE 7860
-# Set the model name as an environment variable (this can be overridden)
-ENV MODEL_NAME=your_model_name_here

+# Use Ubuntu as the base image
+FROM ubuntu:22.04
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies and Python
+RUN apt-get update && apt-get install -y \
+    python3 \
+    python3-pip \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Set Python3 as the default
+RUN ln -s /usr/bin/python3 /usr/bin/python
+# Copy the requirements file and install dependencies
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+# Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | bash
+# Ensure Ollama is in the system path
+ENV PATH="/root/.ollama/bin:$PATH"
+# Copy the application files
+COPY . .
+# Expose the FastAPI default port
+EXPOSE 8000
+# Start Ollama, pull Llama3 if not present, then start FastAPI
+CMD ["sh", "-c", "ollama serve & sleep 5 && ollama pull llama3 && uvicorn main:app --host 0.0.0.0 --port 8000"]

main.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from fastapi import FastAPI, HTTPException, Depends, Header, Request
+from pydantic import BaseModel
+from langchain_community.llms import Ollama  # Correct Import
+import os
+import logging
+import time  # Import time module
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+# API keys from .env
+API_KEYS = {
+    "user1": os.getenv("API_KEY_USER1"),
+    "user2": os.getenv("API_KEY_USER2"),
+}
+app = FastAPI()
+# API Key Authentication
+def verify_api_key(request: Request, api_key: str = Header(None, alias="X-API-Key")):
+    logging.info(f"Received Headers: {request.headers}")  # Log headers
+    if not api_key:
+        raise HTTPException(status_code=401, detail="API key is missing")
+    api_key = api_key.strip()
+    if api_key not in API_KEYS.values():
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return api_key
+# OpenAI-compatible request format
+class OpenAIRequest(BaseModel):
+    model: str
+    messages: list
+    stream: bool = False  # Default to non-streaming
+# Initialize LangChain LLM with Ollama
+def get_llm(model_name: str):
+    return Ollama(model=model_name)
+@app.get("/")
+def home():
+    return {"message": "OpenAI-compatible LangChain + Ollama API is running"}
+@app.post("/v1/chat/completions")
+def generate_text(request: OpenAIRequest, api_key: str = Depends(verify_api_key)):
+    try:
+        llm = get_llm(request.model)
+        # Extract last user message from messages
+        user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
+        if not user_message:
+            raise HTTPException(status_code=400, detail="User message is required")
+        response_text = llm.invoke(user_message)
+        # OpenAI-like response format
+        response = {
+            "id": "chatcmpl-123",
+            "object": "chat.completion",
+            "created": int(time.time()),  # FIXED: Using time.time() instead of os.time()
+            "model": request.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": response_text},
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": len(user_message.split()),
+                "completion_tokens": len(response_text.split()),
+                "total_tokens": len(user_message.split()) + len(response_text.split()),
+            }
+        }
+        return response
+    except Exception as e:
+        logging.error(f"Error generating response: {e}")
+        raise HTTPException(status_code=500, detail="Internal server error")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+openai
+langchain
+requests
+langchain_community
+python-dotenv
+cloudflared