Spaces:

42Cummer
/

UofTearsBotAPI

Paused

42Cummer commited on 14 days ago

Commit

e556488

verified ·

1 Parent(s): 6e75af9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,9 +37,9 @@ MODEL_PATH = hf_hub_download(
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=int(os.getenv("N_CTX", "4096")),
     n_threads=os.cpu_count() or 4,
-    n_batch=int(os.getenv("N_BATCH", "256")),
     chat_format=CHAT_FORMAT,
 )
@@ -57,6 +57,7 @@ async def chat(request: ChatRequest):
         if request.user_id not in chatbots:
             chatbots[request.user_id] = UofTearsBot(llm)
         current_bot = chatbots[request.user_id]
         response = current_bot.converse(request.user_text)
         return JSONResponse(content={"response": response, "history": current_bot.history})
     except Exception as e:

 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=int(os.getenv("N_CTX", "1024")),
     n_threads=os.cpu_count() or 4,
+    n_batch=int(os.getenv("N_BATCH", "32")),
     chat_format=CHAT_FORMAT,
 )
         if request.user_id not in chatbots:
             chatbots[request.user_id] = UofTearsBot(llm)
         current_bot = chatbots[request.user_id]
+        print("[INFO] Model is generating response...", flush=True)
         response = current_bot.converse(request.user_text)
         return JSONResponse(content={"response": response, "history": current_bot.history})
     except Exception as e: