import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # Load tokenizer và model từ Hugging Face model_id = "MindVR/JohnTran_Fine-tune" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") # Hàm trả lời def chat(input_text, history=[]): input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda") with torch.no_grad(): output = model.generate(input_ids, max_new_tokens=200, do_sample=True, top_p=0.95, temperature=0.7) reply = tokenizer.decode(output[0], skip_special_tokens=True) return reply # Giao diện Gradio iface = gr.Interface(fn=chat, inputs=gr.Textbox(lines=2, placeholder="Nhập câu hỏi..."), outputs="text", title="AI Therapy Chatbot") iface.launch()