UofTearsBotAPI / app.py
42Cummer's picture
Uploaded files from Cursor
22d76f2 verified
raw
history blame
1.7 kB
import os
from typing import List, Dict
import logging
import dotenv
import torch
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download, login
import uvicorn
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
AutoModelForCausalLM,
pipeline
)
from UofTearsBot import UofTearsBot
MODEL_REPO="bartowski/Mistral-7B-Instruct-v0.3-GGUF"
MODEL_FILE="Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
CHAT_FORMAT="mistral-instruct"
dotenv.load_dotenv()
login(token=os.getenv("HF_TOKEN"))
MODEL_PATH = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
local_dir="/tmp/models",
local_dir_use_symlinks=False,
)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=int(os.getenv("N_CTX", "4096")),
n_threads=os.cpu_count() or 4,
n_batch=int(os.getenv("N_BATCH", "256")),
chat_format=CHAT_FORMAT,
)
# Start the FastAPI app
app = FastAPI()
chatbots: Dict[str, UofTearsBot] = {}
class ChatRequest(BaseModel):
user_id: str
user_text: str
@app.post("/chat")
async def chat(request: ChatRequest):
if request.user_id not in chatbots:
chatbots[request.user_id] = UofTearsBot(llm)
current_bot = chatbots[request.user_id]
response = current_bot.converse(request.user_text)
return JSONResponse(content={"response": response, "history": current_bot.history})
@app.get("/", response_class=HTMLResponse)
async def home():
return "<h1>App is running πŸš€</h1>"
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860) # huggingface port