Spaces:

schuler
/

experimental-KPhi-3-nano-4k-instruct

Sleeping

App Files Files Community

experimental-KPhi-3-nano-4k-instruct / app.py

schuler

Update app.py

8ee116f verified 4 months ago

raw

history blame contribute delete

9.07 kB

	# adapted from:
	# https://medium.com/@james.irving.phd/creating-your-personal-chatbot-using-hugging-face-spaces-and-streamlit-596a54b9e3ed

	import os
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, pipeline
	from transformers import LlamaTokenizer
	import streamlit as st
	import torch

	# Define the model repository
	REPO_NAME = 'schuler/experimental-JP47D20'
	# REPO_NAME = 'schuler/experimental-JP47D21-KPhi-3-micro-4k-instruct'

	# Configure the Streamlit app
	st.set_page_config(page_title="Experimental KPhi3 Model - Currently in Training", page_icon="🤗")
	st.title("Experimental KPhi3 Model - Currently in Training")

	# Load tokenizer and model
	@st.cache_resource(show_spinner="Loading model...")
	def load_model(local_repo_name):
	# tokenizer = AutoTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
	tokenizer = LlamaTokenizer.from_pretrained(local_repo_name, trust_remote_code=True)
	generator_conf = GenerationConfig.from_pretrained(local_repo_name)
	model = AutoModelForCausalLM.from_pretrained(local_repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16)
	return tokenizer, generator_conf, model

	tokenizer, generator_conf, model = load_model(REPO_NAME)

	total_params = sum(p.numel() for p in model.parameters())
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	embed_params = sum(p.numel() for p in model.model.embed_tokens.parameters())*2
	non_embed_params = (trainable_params - embed_params) / 1e6

	st.markdown(f"*This chat uses the {REPO_NAME} model with {model.get_memory_footprint() / 1e6:.2f} MB memory footprint. ")

	# st.markdown(f"Total number of parameters: {total_params}. ")
	# st.markdown(f"Total number of trainable parameters: {trainable_params}. ")
	# st.markdown(f"Total number of embed parameters: {embed_params}. ")

	st.markdown(f"Total number of non embedding trainable parameters: {non_embed_params:.2f} million. ")
	st.markdown(f"You may ask questions such as 'What is biology?' or 'What is the human body?'*")

	try:
	generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
	except Exception as e:
	st.error(f"Failed to load model: {str(e)}")

	# Initialize session state for avatars
	if "avatars" not in st.session_state:
	st.session_state.avatars = {'user': None, 'assistant': None}

	# Initialize session state for user text input
	if 'user_text' not in st.session_state:
	st.session_state.user_text = None

	# Initialize session state for model parameters
	if "max_response_length" not in st.session_state:
	st.session_state.max_response_length = 64

	if "system_message" not in st.session_state:
	st.session_state.system_message = ""

	if "starter_message" not in st.session_state:
	st.session_state.starter_message = "Hello, there! How can I help you today?"

	if "can_continue" not in st.session_state:
	st.session_state.can_continue = False

	# Initialize state for continue action
	need_continue = False

	# Initialize the last response
	if "last_response" not in st.session_state:
	st.session_state.last_response = ''

	# Sidebar for settings
	with st.sidebar:
	st.header("System Settings")

	# AI Settings
	st.session_state.system_message = st.text_area(
	"System Message", value=st.session_state.system_message
	)
	st.session_state.starter_message = st.text_area(
	'First AI Message', value=st.session_state.starter_message
	)

	# Model Settings
	st.session_state.max_response_length = st.number_input(
	"Max Response Length", value=st.session_state.max_response_length
	)

	# Avatar Selection
	st.markdown("Select Avatars:")
	col1, col2 = st.columns(2)
	with col1:
	st.session_state.avatars['assistant'] = st.selectbox(
	"AI Avatar", options=["🤗", "💬", "🤖"], index=0
	)
	with col2:
	st.session_state.avatars['user'] = st.selectbox(
	"User Avatar", options=["👤", "👱‍♂️", "👨🏾", "👩", "👧🏾"], index=0
	)
	# Reset Chat History
	reset_history = st.button("Reset Chat History")

	# Initialize or reset chat history
	if "chat_history" not in st.session_state or reset_history:
	st.session_state.chat_history = [] # [{"role": "assistant", "content": st.session_state.starter_message}]

	def get_response(system_message, chat_history, user_text, max_new_tokens=256, continue_last=False):
	"""
	Generates a response from the chatbot model.

	Args:
	system_message (str): The system message for the conversation.
	chat_history (list): The list of previous chat messages.
	user_text (str): The user's input text.
	max_new_tokens (int): The maximum number of new tokens to generate.
	continue_last (bool): Whether to continue the last assistant response.

	Returns:
	tuple: A tuple containing the generated response and the updated chat history.
	"""
	if continue_last:
	# We want to continue the last assistant response
	prompt = st.session_state.last_response
	else:
	# Build the conversation prompt
	if (len(system_message)>0):
	prompt = "<\|assistant\|>"+system_message+f"<\|end\|>"
	else:
	prompt = ''
	# f"{system_message}\nCurrent Conversation:\n"
	for message in chat_history:
	role = "<\|assistant\|>" if message['role'] == 'assistant' else "<\|user\|>"
	prompt += f"{role}{message['content']}<\|end\|>"
	prompt += f"<\|user\|>{user_text}<\|end\|><\|assistant\|>"

	# Generate the response
	response_output = generator(
	prompt,
	generation_config=generator_conf,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=0.25,
	repetition_penalty=1.2
	)

	generated_text = response_output[0]['generated_text']

	st.session_state.last_response = generated_text

	# Extract the assistant's response
	assistant_response = generated_text[len(prompt):] # .strip()

	if continue_last:
	# Append the continued text to the last assistant message
	st.session_state.chat_history[-1]['content'] += assistant_response
	else:
	# Update the chat history
	chat_history.append({'role': 'user', 'content': user_text})
	chat_history.append({'role': 'assistant', 'content': assistant_response})

	return assistant_response, chat_history

	# Chat interface
	chat_interface = st.container()
	def refresh_chat():
	with chat_interface:
	output_container = st.container()

	# Display chat messages
	with output_container:
	for idx, message in enumerate(st.session_state.chat_history):
	if message['role'] == 'system':
	continue
	with st.chat_message(message['role'], avatar=st.session_state.avatars[message['role']]):
	st.markdown(message['content'])

	# If this is the last assistant message, add the "Continue" button
	# if idx == len(st.session_state.chat_history) - 1 and message['role'] == 'assistant':

	refresh_chat()

	# User input area (moved to the bottom)
	st.session_state.user_text = st.chat_input(placeholder="Enter your text here.")

	# When the user enters new text
	if st.session_state.user_text:
	# Display the user's message
	with st.chat_message("user", avatar=st.session_state.avatars['user']):
	st.markdown(st.session_state.user_text)

	# Display a spinner while generating the response
	with st.chat_message("assistant", avatar=st.session_state.avatars['assistant']):
	with st.spinner("Thinking..."):
	# Generate the assistant's response
	response, st.session_state.chat_history = get_response(
	system_message=st.session_state.system_message,
	user_text=st.session_state.user_text,
	chat_history=st.session_state.chat_history,
	max_new_tokens=st.session_state.max_response_length,
	continue_last=False
	)
	st.markdown(response)
	st.session_state.can_continue = True

	# Clear the user input
	st.session_state.user_text = None

	if st.session_state.can_continue:
	if st.button("Continue"):
	need_continue = True
	else:
	need_continue = False

	# If "Continue" button was pressed
	if need_continue:
	# Display a spinner while generating the continuation
	with st.chat_message("assistant", avatar=st.session_state.avatars['assistant']):
	with st.spinner("Continuing..."):
	# Generate the continuation of the assistant's last response
	response, st.session_state.chat_history = get_response(
	system_message=st.session_state.system_message,
	user_text=None,
	chat_history=st.session_state.chat_history,
	max_new_tokens=st.session_state.max_response_length,
	continue_last=True
	)
	st.markdown(response)
	st.rerun()