import os import torch from huggingface_hub import login from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr HF_TOKEN = os.environ.get("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) model_id = "MindVR/JohnTran_Fine-tune" tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", low_cpu_mem_usage=True, token=HF_TOKEN ) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) def build_prompt(history, new_message): prompt = "" if history: prompt += "\n".join(history) + "\n" prompt += f"User: {new_message}\nAI:" return prompt def chat(history, new_message): # Đảm bảo history là list (khi nhập trực tiếp trên UI đôi khi là str) if isinstance(history, str): import ast try: history = ast.literal_eval(history) except: history = [history] prompt = build_prompt(history, new_message) input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) with torch.no_grad(): output = model.generate( input_ids, max_new_tokens=200, do_sample=True, top_p=0.95, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) output_text = tokenizer.decode(output[0], skip_special_tokens=True) if "AI:" in output_text: response = output_text.split("AI:")[-1].strip() else: response = output_text.strip() return response iface = gr.Interface( fn=chat, inputs=[ gr.Textbox(lines=8, label="History (JSON list, ví dụ: [\"User: Xin chào\"] )"), gr.Textbox(label="New message") ], outputs=gr.Textbox(label="AI Response"), title="MindVR Therapy Chatbot", allow_flagging="never" ) iface.launch()