Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -37,9 +37,9 @@ MODEL_PATH = hf_hub_download(
|
|
37 |
|
38 |
llm = Llama(
|
39 |
model_path=MODEL_PATH,
|
40 |
-
n_ctx=int(os.getenv("N_CTX", "
|
41 |
n_threads=os.cpu_count() or 4,
|
42 |
-
n_batch=int(os.getenv("N_BATCH", "
|
43 |
chat_format=CHAT_FORMAT,
|
44 |
)
|
45 |
|
@@ -57,6 +57,7 @@ async def chat(request: ChatRequest):
|
|
57 |
if request.user_id not in chatbots:
|
58 |
chatbots[request.user_id] = UofTearsBot(llm)
|
59 |
current_bot = chatbots[request.user_id]
|
|
|
60 |
response = current_bot.converse(request.user_text)
|
61 |
return JSONResponse(content={"response": response, "history": current_bot.history})
|
62 |
except Exception as e:
|
|
|
37 |
|
38 |
llm = Llama(
|
39 |
model_path=MODEL_PATH,
|
40 |
+
n_ctx=int(os.getenv("N_CTX", "1024")),
|
41 |
n_threads=os.cpu_count() or 4,
|
42 |
+
n_batch=int(os.getenv("N_BATCH", "32")),
|
43 |
chat_format=CHAT_FORMAT,
|
44 |
)
|
45 |
|
|
|
57 |
if request.user_id not in chatbots:
|
58 |
chatbots[request.user_id] = UofTearsBot(llm)
|
59 |
current_bot = chatbots[request.user_id]
|
60 |
+
print("[INFO] Model is generating response...", flush=True)
|
61 |
response = current_bot.converse(request.user_text)
|
62 |
return JSONResponse(content={"response": response, "history": current_bot.history})
|
63 |
except Exception as e:
|