Spaces:

42Cummer
/

UofTearsBotAPI

Paused

App Files Files Community

UofTearsBotAPI / app.py

42Cummer

Uploaded files from Cursor

22d76f2 verified 21 days ago

raw

history blame

1.7 kB

	import os
	from typing import List, Dict
	import logging
	import dotenv

	import torch
	from fastapi import FastAPI, HTTPException, Request
	from fastapi.responses import JSONResponse, HTMLResponse
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download, login

	import uvicorn

	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	AutoModelForCausalLM,
	pipeline
	)

	from UofTearsBot import UofTearsBot

	MODEL_REPO="bartowski/Mistral-7B-Instruct-v0.3-GGUF"
	MODEL_FILE="Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
	CHAT_FORMAT="mistral-instruct"

	dotenv.load_dotenv()
	login(token=os.getenv("HF_TOKEN"))

	MODEL_PATH = hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE,
	local_dir="/tmp/models",
	local_dir_use_symlinks=False,
	)

	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=int(os.getenv("N_CTX", "4096")),
	n_threads=os.cpu_count() or 4,
	n_batch=int(os.getenv("N_BATCH", "256")),
	chat_format=CHAT_FORMAT,
	)

	# Start the FastAPI app
	app = FastAPI()
	chatbots: Dict[str, UofTearsBot] = {}

	class ChatRequest(BaseModel):
	user_id: str
	user_text: str

	@app.post("/chat")
	async def chat(request: ChatRequest):
	if request.user_id not in chatbots:
	chatbots[request.user_id] = UofTearsBot(llm)
	current_bot = chatbots[request.user_id]
	response = current_bot.converse(request.user_text)
	return JSONResponse(content={"response": response, "history": current_bot.history})

	@app.get("/", response_class=HTMLResponse)
	async def home():
	return "<h1>App is running 🚀</h1>"


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=7860) # huggingface port