rayymaxx commited on
Commit
cce707f
·
1 Parent(s): d89fa1f

Add application file

Browse files
Files changed (3) hide show
  1. Dockerfile +18 -0
  2. app.py +31 -0
  3. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.12
2
+ FROM python:3.12-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy files
8
+ COPY . .
9
+
10
+ # Install dependencies
11
+ RUN pip install --upgrade pip
12
+ RUN pip install -r requirements.txt
13
+
14
+ # Expose FastAPI port
15
+ EXPOSE 7860
16
+
17
+ # Run FastAPI
18
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from peft import PeftModel
5
+
6
+ # --- Config ---
7
+ BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
8
+ FINETUNED_ADAPTER = "rayymaxx/DirectEd-AI-LoRA"
9
+ MAX_NEW_TOKENS = 200
10
+
11
+ app = FastAPI(title="Directed AI FastAPI")
12
+
13
+ # --- Load model & tokenizer once at startup ---
14
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto")
16
+ model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER)
17
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
18
+
19
+ class Prompt(BaseModel):
20
+ prompt: str
21
+
22
+ @app.post("/generate")
23
+ def generate_text(prompt_data: Prompt):
24
+ prompt_text = prompt_data.prompt
25
+ output = text_generator(prompt_text, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7)
26
+ return {"response": output[0]["generated_text"]}
27
+
28
+
29
+ @app.get("/")
30
+ def greet_json():
31
+ return {"Hello": "World!"}
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers>=4.35.0
4
+ torch
5
+ peft
6
+ trl