from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer import torch app = FastAPI() # Load model & tokenizer MODEL_PATH = "./" # since it's inside the same repo tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) model = AutoModelForCausalLM.from_pretrained( MODEL_PATH, torch_dtype=torch.float16, device_map="auto" ) class RequestBody(BaseModel): prompt: str max_length: int = 100 @app.post("/generate") def generate_text(req: RequestBody): inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_length=req.max_length) text = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"generated_text": text} @app.get("/") def root(): return {"message": "FastAPI Hugging Face Space is running!"}