schuler's picture
Update app.py
8ee116f verified
# adapted from:
# https://medium.com/@james.irving.phd/creating-your-personal-chatbot-using-hugging-face-spaces-and-streamlit-596a54b9e3ed
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
from transformers import LlamaTokenizer
import streamlit as st
import torch
# Define the model repository
REPO_NAME = 'schuler/experimental-JP47D20'
# REPO_NAME = 'schuler/experimental-JP47D21-KPhi-3-micro-4k-instruct'
# Configure the Streamlit app
st.set_page_config(page_title="Experimental KPhi3 Model - Currently in Training", page_icon="πŸ€—")
st.title("Experimental KPhi3 Model - Currently in Training")
# Load tokenizer and model
@st.cache_resource(show_spinner="Loading model...")
def load_model(local_repo_name):
# tokenizer = AutoTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
generator_conf = GenerationConfig.from_pretrained(local_repo_name)
model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16)
return tokenizer, generator_conf, model
tokenizer, generator_conf, model = load_model(REPO_NAME)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
embed_params = sum(p.numel() for p in model.model.embed_tokens.parameters())*2
non_embed_params = (trainable_params - embed_params) / 1e6
st.markdown(f"*This chat uses the {REPO_NAME} model with {model.get_memory_footprint() / 1e6:.2f} MB memory footprint. ")
# st.markdown(f"Total number of parameters: {total_params}. ")
# st.markdown(f"Total number of trainable parameters: {trainable_params}. ")
# st.markdown(f"Total number of embed parameters: {embed_params}. ")
st.markdown(f"Total number of non embedding trainable parameters: {non_embed_params:.2f} million. ")
st.markdown(f"You may ask questions such as 'What is biology?' or 'What is the human body?'*")
try:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
except Exception as e:
st.error(f"Failed to load model: {str(e)}")
# Initialize session state for avatars
if "avatars" not in st.session_state:
st.session_state.avatars = {'user': None, 'assistant': None}
# Initialize session state for user text input
if 'user_text' not in st.session_state:
st.session_state.user_text = None
# Initialize session state for model parameters
if "max_response_length" not in st.session_state:
st.session_state.max_response_length = 64
if "system_message" not in st.session_state:
st.session_state.system_message = ""
if "starter_message" not in st.session_state:
st.session_state.starter_message = "Hello, there! How can I help you today?"
if "can_continue" not in st.session_state:
st.session_state.can_continue = False
# Initialize state for continue action
need_continue = False
# Initialize the last response
if "last_response" not in st.session_state:
st.session_state.last_response = ''
# Sidebar for settings
with st.sidebar:
st.header("System Settings")
# AI Settings
st.session_state.system_message = st.text_area(
"System Message", value=st.session_state.system_message
)
st.session_state.starter_message = st.text_area(
'First AI Message', value=st.session_state.starter_message
)
# Model Settings
st.session_state.max_response_length = st.number_input(
"Max Response Length", value=st.session_state.max_response_length
)
# Avatar Selection
st.markdown("*Select Avatars:*")
col1, col2 = st.columns(2)
with col1:
st.session_state.avatars['assistant'] = st.selectbox(
"AI Avatar", options=["πŸ€—", "πŸ’¬", "πŸ€–"], index=0
)
with col2:
st.session_state.avatars['user'] = st.selectbox(
"User Avatar", options=["πŸ‘€", "πŸ‘±β€β™‚οΈ", "πŸ‘¨πŸΎ", "πŸ‘©", "πŸ‘§πŸΎ"], index=0
)
# Reset Chat History
reset_history = st.button("Reset Chat History")
# Initialize or reset chat history
if "chat_history" not in st.session_state or reset_history:
st.session_state.chat_history = [] # [{"role": "assistant", "content": st.session_state.starter_message}]
def get_response(system_message, chat_history, user_text, max_new_tokens=256, continue_last=False):
"""
Generates a response from the chatbot model.
Args:
system_message (str): The system message for the conversation.
chat_history (list): The list of previous chat messages.
user_text (str): The user's input text.
max_new_tokens (int): The maximum number of new tokens to generate.
continue_last (bool): Whether to continue the last assistant response.
Returns:
tuple: A tuple containing the generated response and the updated chat history.
"""
if continue_last:
# We want to continue the last assistant response
prompt = st.session_state.last_response
else:
# Build the conversation prompt
if (len(system_message)>0):
prompt = "<|assistant|>"+system_message+f"<|end|>"
else:
prompt = ''
# f"{system_message}\nCurrent Conversation:\n"
for message in chat_history:
role = "<|assistant|>" if message['role'] == 'assistant' else "<|user|>"
prompt += f"{role}{message['content']}<|end|>"
prompt += f"<|user|>{user_text}<|end|><|assistant|>"
# Generate the response
response_output = generator(
prompt,
generation_config=generator_conf,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=0.25,
repetition_penalty=1.2
)
generated_text = response_output[0]['generated_text']
st.session_state.last_response = generated_text
# Extract the assistant's response
assistant_response = generated_text[len(prompt):] # .strip()
if continue_last:
# Append the continued text to the last assistant message
st.session_state.chat_history[-1]['content'] += assistant_response
else:
# Update the chat history
chat_history.append({'role': 'user', 'content': user_text})
chat_history.append({'role': 'assistant', 'content': assistant_response})
return assistant_response, chat_history
# Chat interface
chat_interface = st.container()
def refresh_chat():
with chat_interface:
output_container = st.container()
# Display chat messages
with output_container:
for idx, message in enumerate(st.session_state.chat_history):
if message['role'] == 'system':
continue
with st.chat_message(message['role'], avatar=st.session_state.avatars[message['role']]):
st.markdown(message['content'])
# If this is the last assistant message, add the "Continue" button
# if idx == len(st.session_state.chat_history) - 1 and message['role'] == 'assistant':
refresh_chat()
# User input area (moved to the bottom)
st.session_state.user_text = st.chat_input(placeholder="Enter your text here.")
# When the user enters new text
if st.session_state.user_text:
# Display the user's message
with st.chat_message("user", avatar=st.session_state.avatars['user']):
st.markdown(st.session_state.user_text)
# Display a spinner while generating the response
with st.chat_message("assistant", avatar=st.session_state.avatars['assistant']):
with st.spinner("Thinking..."):
# Generate the assistant's response
response, st.session_state.chat_history = get_response(
system_message=st.session_state.system_message,
user_text=st.session_state.user_text,
chat_history=st.session_state.chat_history,
max_new_tokens=st.session_state.max_response_length,
continue_last=False
)
st.markdown(response)
st.session_state.can_continue = True
# Clear the user input
st.session_state.user_text = None
if st.session_state.can_continue:
if st.button("Continue"):
need_continue = True
else:
need_continue = False
# If "Continue" button was pressed
if need_continue:
# Display a spinner while generating the continuation
with st.chat_message("assistant", avatar=st.session_state.avatars['assistant']):
with st.spinner("Continuing..."):
# Generate the continuation of the assistant's last response
response, st.session_state.chat_history = get_response(
system_message=st.session_state.system_message,
user_text=None,
chat_history=st.session_state.chat_history,
max_new_tokens=st.session_state.max_response_length,
continue_last=True
)
st.markdown(response)
st.rerun()