rayymaxx's picture
Add application file
cce707f
raw
history blame
1.05 kB
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
# --- Config ---
BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
FINETUNED_ADAPTER = "rayymaxx/DirectEd-AI-LoRA"
MAX_NEW_TOKENS = 200
app = FastAPI(title="Directed AI FastAPI")
# --- Load model & tokenizer once at startup ---
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto")
model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
class Prompt(BaseModel):
prompt: str
@app.post("/generate")
def generate_text(prompt_data: Prompt):
prompt_text = prompt_data.prompt
output = text_generator(prompt_text, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7)
return {"response": output[0]["generated_text"]}
@app.get("/")
def greet_json():
return {"Hello": "World!"}