import gradio as gr # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("mailboxlab11/llama-3-medicalassistant") model = AutoModelForCausalLM.from_pretrained("mailboxlab11/llama-3-medicalassistant") def generate(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create the Gradio interface iface = gr.Interface(fn=generate, inputs="text", outputs="text", title="Medical Assistant", description="Ask me any medical question.") # Launch the Gradio interface iface.launch()