import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load model and tokenizer model_name = "ibm-granite/granite-4.0-tiny-preview" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Chat function def chatbot(user_message, history): messages = history + [{"role": "user", "content": user_message}] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=200) response = tokenizer.decode( outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True ) history.append({"role": "user", "content": user_message}) history.append({"role": "assistant", "content": response}) return response, history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🤖 Granite-4.0 Tiny Chatbot") chatbot_ui = gr.Chatbot() msg = gr.Textbox(placeholder="Type your message here...") clear = gr.Button("Clear") state = gr.State([]) def user_input(message, history): response, history = chatbot(message, history) return history, history msg.submit(user_input, [msg, state], [chatbot_ui, state]) clear.click(lambda: ([], []), None, [chatbot_ui, state]) demo.launch()