Spaces:

a2948764576888
/

TSUNG

Paused

App Files Files Community

a2948764576888 commited on Aug 20

Commit

8c485e0

verified ·

1 Parent(s): bd774dd

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -7

app.py CHANGED Viewed

@@ -3,16 +3,21 @@ from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# 初始化 Qwen 模型與 tokenizer
 tokenizer = AutoTokenizer.from_pretrained(
-    "Qwen/Qwen-1_8B-Chat",
     trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
-    "Qwen/Qwen-1_8B-Chat",
     trust_remote_code=True,
     torch_dtype=torch.float32
-).to("cpu")  # 若有 GPU 可改成 "cuda"
 # 建立 FastAPI 應用
 app = FastAPI()
@@ -26,8 +31,11 @@ class Prompt(BaseModel):
 @app.post("/chat")
 async def chat(prompt: Prompt):
     global chat_history
     if prompt.reset:
         chat_history = []
     chat_history.append({"role": "user", "content": prompt.text})
@@ -38,7 +46,7 @@ async def chat(prompt: Prompt):
     chatml += "<|im_start|>assistant\n"
     try:
-        inputs = tokenizer(chatml, return_tensors="pt").to("cpu")
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
@@ -46,11 +54,24 @@ async def chat(prompt: Prompt):
             temperature=0.7,
             top_p=0.9
         )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        reply = response.split("<|im_end|>")[0].split("<|im_start|>assistant\n")[-1].strip()
         chat_history.append({"role": "assistant", "content": reply})
         return {"reply": reply}
     except Exception as e:
         print("❌ 模型回應錯誤：", e)
         return {"reply": "目前無法取得模型回覆，請稍後再試。"}

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# 初始化 Qwen 模型與 tokenizer（加上 trust_remote_code）
+model_id = "Qwen/Qwen-1_8B-Chat"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🚀 載入模型：{model_id} on {device}")
 tokenizer = AutoTokenizer.from_pretrained(
+    model_id,
     trust_remote_code=True
 )
 model = AutoModelForCausalLM.from_pretrained(
+    model_id,
     trust_remote_code=True,
     torch_dtype=torch.float32
+).to(device)
 # 建立 FastAPI 應用
 app = FastAPI()
 @app.post("/chat")
 async def chat(prompt: Prompt):
     global chat_history
+    print(f"\n📝 使用者輸入：{prompt.text}")
     if prompt.reset:
         chat_history = []
+        print("🔄 Chat history 已重置")
     chat_history.append({"role": "user", "content": prompt.text})
     chatml += "<|im_start|>assistant\n"
     try:
+        inputs = tokenizer(chatml, return_tensors="pt").to(device)
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
             temperature=0.7,
             top_p=0.9
         )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
+        print("🧠 原始模型回覆：", response)
+        # 擷取 assistant 回覆內容
+        if "<|im_start|>assistant\n" in response:
+            reply = response.split("<|im_end|>")[0].split("<|im_start|>assistant\n")[-1].strip()
+        else:
+            reply = response  # fallback
+        if not reply:
+            reply = "⚠️ 模型未產生回覆，請稍後再試。"
+            print("⚠️ 回覆為空字串")
         chat_history.append({"role": "assistant", "content": reply})
+        print("✅ 最終回覆：", reply)
         return {"reply": reply}
     except Exception as e:
         print("❌ 模型回應錯誤：", e)
         return {"reply": "目前無法取得模型回覆，請稍後再試。"}