import streamlit as st from huggingface_hub import InferenceClient import os # Initialize the client with the token client = InferenceClient("Dolly135/Pen_Model", token=os.getenv("token")) # Define the respond function def respond( message: str, history: list, # To store the conversation history system_message: str, temperature: float, top_p: float, max_new_tokens: int, # New parameter for max tokens ): # Initialize messages with the system message messages = [{"role": "system", "content": system_message}] # Append history messages messages.extend(history) # Append the current user message messages.append({"role": "user", "content": message}) try: response = "" for msg in client.text_generation( messages, stream=True, temperature=temperature, top_p=top_p, max_new_tokens=max_new_tokens, # Pass max tokens to model ): token = msg.choices[0].delta.content response += token yield response except Exception as e: yield f"An error occurred: {str(e)}" # Streamlit setup def main(): # Sidebar for system settings st.sidebar.title("Model Settings") system_message = st.sidebar.text_input("System message", "You are Pen.") max_new_tokens = st.sidebar.slider("Max new tokens", 1, 2048, 512) temperature = st.sidebar.slider("Temperature", 0.1, 4.0, 0.7) top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.1, 1.0, 0.95) # Chat history if 'history' not in st.session_state: st.session_state.history = [] # Input field for user message message = st.text_input("You:", "") # Submit button if st.button("Send"): # Respond to the user response = "" if message: for response_text in respond( message, st.session_state.history, system_message, temperature, top_p, max_new_tokens ): response = response_text # Update history and show conversation if response: st.session_state.history.append({"role": "user", "content": message}) st.session_state.history.append({"role": "assistant", "content": response}) st.text_area("Conversation", value=str(st.session_state.history), height=300) # Run the app if __name__ == "__main__": main()