redfernstech commited on
Commit
4bae573
·
verified ·
1 Parent(s): ed775de

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +2 -0
  2. Dockerfile +34 -25
  3. main.py +85 -0
  4. requirements.txt +8 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ API_KEY_USER1=your-secure-key-1
2
+ API_KEY_USER2=your-secure-key-2
Dockerfile CHANGED
@@ -1,25 +1,34 @@
1
- FROM ollama/ollama:latest
2
-
3
- # Install Python and pip
4
- RUN apt update && apt install -y python3 python3-pip
5
-
6
- # Install litellm and its proxy dependencies
7
- RUN pip install 'litellm[proxy]'
8
-
9
- # Create a directory for Ollama data
10
- RUN mkdir -p /.ollama && chmod -R 777 /.ollama
11
-
12
- WORKDIR /.ollama
13
-
14
- # Copy the entry point script
15
- COPY entrypoint.sh /entrypoint.sh
16
- RUN chmod +x /entrypoint.sh
17
-
18
- # Set the entry point script as the default command
19
- ENTRYPOINT ["/entrypoint.sh"]
20
-
21
- # Expose the port that Ollama runs on
22
- EXPOSE 7860
23
-
24
- # Set the model name as an environment variable (this can be overridden)
25
- ENV MODEL_NAME=your_model_name_here
 
 
 
 
 
 
 
 
 
 
1
+ # Use Ubuntu as the base image
2
+ FROM ubuntu:22.04
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies and Python
8
+ RUN apt-get update && apt-get install -y \
9
+ python3 \
10
+ python3-pip \
11
+ curl \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set Python3 as the default
15
+ RUN ln -s /usr/bin/python3 /usr/bin/python
16
+
17
+ # Copy the requirements file and install dependencies
18
+ COPY requirements.txt ./
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Install Ollama
22
+ RUN curl -fsSL https://ollama.com/install.sh | bash
23
+
24
+ # Ensure Ollama is in the system path
25
+ ENV PATH="/root/.ollama/bin:$PATH"
26
+
27
+ # Copy the application files
28
+ COPY . .
29
+
30
+ # Expose the FastAPI default port
31
+ EXPOSE 8000
32
+
33
+ # Start Ollama, pull Llama3 if not present, then start FastAPI
34
+ CMD ["sh", "-c", "ollama serve & sleep 5 && ollama pull llama3 && uvicorn main:app --host 0.0.0.0 --port 8000"]
main.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends, Header, Request
2
+ from pydantic import BaseModel
3
+ from langchain_community.llms import Ollama # Correct Import
4
+ import os
5
+ import logging
6
+ import time # Import time module
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+
15
+ # API keys from .env
16
+ API_KEYS = {
17
+ "user1": os.getenv("API_KEY_USER1"),
18
+ "user2": os.getenv("API_KEY_USER2"),
19
+ }
20
+
21
+ app = FastAPI()
22
+
23
+ # API Key Authentication
24
+ def verify_api_key(request: Request, api_key: str = Header(None, alias="X-API-Key")):
25
+ logging.info(f"Received Headers: {request.headers}") # Log headers
26
+ if not api_key:
27
+ raise HTTPException(status_code=401, detail="API key is missing")
28
+
29
+ api_key = api_key.strip()
30
+ if api_key not in API_KEYS.values():
31
+ raise HTTPException(status_code=401, detail="Invalid API key")
32
+
33
+ return api_key
34
+
35
+ # OpenAI-compatible request format
36
+ class OpenAIRequest(BaseModel):
37
+ model: str
38
+ messages: list
39
+ stream: bool = False # Default to non-streaming
40
+
41
+ # Initialize LangChain LLM with Ollama
42
+ def get_llm(model_name: str):
43
+ return Ollama(model=model_name)
44
+
45
+ @app.get("/")
46
+ def home():
47
+ return {"message": "OpenAI-compatible LangChain + Ollama API is running"}
48
+
49
+ @app.post("/v1/chat/completions")
50
+ def generate_text(request: OpenAIRequest, api_key: str = Depends(verify_api_key)):
51
+ try:
52
+ llm = get_llm(request.model)
53
+
54
+ # Extract last user message from messages
55
+ user_message = next((msg["content"] for msg in reversed(request.messages) if msg["role"] == "user"), None)
56
+ if not user_message:
57
+ raise HTTPException(status_code=400, detail="User message is required")
58
+
59
+ response_text = llm.invoke(user_message)
60
+
61
+ # OpenAI-like response format
62
+ response = {
63
+ "id": "chatcmpl-123",
64
+ "object": "chat.completion",
65
+ "created": int(time.time()), # FIXED: Using time.time() instead of os.time()
66
+ "model": request.model,
67
+ "choices": [
68
+ {
69
+ "index": 0,
70
+ "message": {"role": "assistant", "content": response_text},
71
+ "finish_reason": "stop",
72
+ }
73
+ ],
74
+ "usage": {
75
+ "prompt_tokens": len(user_message.split()),
76
+ "completion_tokens": len(response_text.split()),
77
+ "total_tokens": len(user_message.split()) + len(response_text.split()),
78
+ }
79
+ }
80
+
81
+ return response
82
+
83
+ except Exception as e:
84
+ logging.error(f"Error generating response: {e}")
85
+ raise HTTPException(status_code=500, detail="Internal server error")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ openai
4
+ langchain
5
+ requests
6
+ langchain_community
7
+ python-dotenv
8
+ cloudflared