Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
from peft import PeftModel | |
# --- Config --- | |
BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit" | |
FINETUNED_ADAPTER = "rayymaxx/DirectEd-AI-LoRA" | |
MAX_NEW_TOKENS = 200 | |
app = FastAPI(title="Directed AI FastAPI") | |
# --- Load model & tokenizer once at startup --- | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto") | |
model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER) | |
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto") | |
class Prompt(BaseModel): | |
prompt: str | |
def generate_text(prompt_data: Prompt): | |
prompt_text = prompt_data.prompt | |
output = text_generator(prompt_text, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7) | |
return {"response": output[0]["generated_text"]} | |
def greet_json(): | |
return {"Hello": "World!"} |