import streamlit as st from mlx_lm import load, generate # Load your model and tokenizer model, tokenizer = load("Rafii/f1llama") prompt="hello" if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None: messages = [{"role": "user", "content": prompt}] prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # response = generate(model, tokenizer, prompt=prompt, verbose=True) st.title("Your F1 Bro") # User input user_input = st.text_input("Enter text:") if st.button("Submit"): # Tokenize input and make predictions # inputs = tokenizer(user_input, return_tensors="pt") # outputs = model(**inputs) response = generate(model, tokenizer, prompt=user_input, verbose=True) st.write(response)