import torch from transformers import pipeline import streamlit as st import os token = os.environ.get('HF_TOKEN') model_id = "google/gemma-2-2b-it" welcome_message = f"Hello there 👋! Is there anything I can help you with?" @st.cache_resource def model_setup(model_id): pipe = pipeline( "text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="cpu", token=token, ) return pipe def runModel(prompt): messages = [{"role": "user", "content": "You are a helpful assistant who politely answers user's questions."+prompt}] outputs = pipe( messages, max_new_tokens=4096, ) return outputs[0]["generated_text"][-1]["content"] ### load model pipe = model_setup(model_id) ### initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] st.session_state.messages.append({"role": "assistant", "content": welcome_message}) ### display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) ### accept user input if prompt := st.chat_input("Type here!",key="question"): # display user message in chat message container with st.chat_message("user"): st.markdown(prompt) # add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # run model response = runModel(prompt) # display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) # add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response})