Spaces:

Rafii
/

f1llama

Sleeping

App Files Files Community

f1llama / app.py

Rafii

refactor: simplify model initialization and remove caching

60e1b89 2 months ago

raw

history blame

3.2 kB

	import streamlit as st
	from mlx_lm import load, generate
	from huggingface_hub import login
	import os
	from langchain.memory import ConversationBufferMemory

	# @st.cache_resource
	# def init_model():
	# token = os.getenv("HF_TOKEN")
	# if token:
	# login(token=token)
	# return load("Rafii/f1llama")
	# return load("mlx-community/Mixtral-8x7B-Instruct-v0.1")
	token = os.getenv("HF_TOKEN")
	model, tokenizer = load("Rafii/f1llama")

	if "memory" not in st.session_state:
	st.session_state.memory = ConversationBufferMemory(return_messages=True)

	def format_chat_history(messages):
	formatted = ""
	for msg in messages:
	if "input" in msg:
	formatted += f"Human: {msg['input']}\n"
	if "output" in msg:
	formatted += f"Assistant: {msg['output']}\n"
	return formatted

	def generate_response(user_input, max_tokens=100):
	try:
	# Get chat history
	chat_history = st.session_state.memory.load_memory_variables({})
	history = chat_history.get("history", "")

	# Create contextual prompt
	context = format_chat_history(history)
	full_prompt = f"""Previous conversation:
	{context}
	Human: {user_input}
	Assistant:"""

	if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
	messages = [{"role": "user", "content": full_prompt}]
	prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	else:
	prompt = full_prompt

	response = generate(
	model,
	tokenizer,
	prompt=prompt,
	verbose=True
	)
	return response
	except Exception as e:
	st.error(f"Error generating response: {str(e)}")
	return "Sorry, I encountered an error."

	st.title("F1 Chatbot 🏎️")

	user_input = st.text_input("Ask me anything:", key="user_input")

	# Add debug prints and modified display logic
	if st.button("Send", key="send"):
	if user_input:
	with st.spinner("Thinking..."):
	response = generate_response(user_input)
	# Debug print
	st.write(f"Debug - Response: {response}")

	st.session_state.memory.save_context(
	{"input": user_input},
	{"output": response}
	)
	# Debug print
	st.write("Debug - Context saved")

	# Modified display section
	if "memory" in st.session_state:
	st.write("### Conversation")
	try:
	chat_history = st.session_state.memory.load_memory_variables({})
	st.write(f"Debug - Full history: {chat_history}") # Debug print

	if "history" in chat_history:
	for msg in chat_history["history"]:
	st.write(f"Debug - Message: {msg}") # Debug print
	if isinstance(msg, dict):
	if "input" in msg:
	st.info(f"You: {msg['input']}")
	if "output" in msg:
	st.success(f"Assistant: {msg['output']}")
	except Exception as e:
	st.error(f"Error displaying conversation: {str(e)}")