import streamlit as st from functools import lru_cache import requests # Cache model loading to optimize performance @lru_cache(maxsize=3) def load_hf_model(model_name): # Use the Hugging Face Inference API directly api_url = f"https://api-inference.huggingface.co/models/deepseek-ai/{model_name}" return api_url # Load all models at startup MODELS = { "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"), "DeepSeek-R1": load_hf_model("DeepSeek-R1"), "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero") } # --- Chatbot function --- def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p): history = history or [] # Get the selected model API URL api_url = MODELS[model_choice] # Create payload for the model payload = { "inputs": { "messages": [{"role": "user", "content": input_text}], "system": system_message, "max_tokens": max_new_tokens, "temperature": temperature, "top_p": top_p } } # Run inference using the selected model try: headers = {"Authorization": f"Bearer {st.secrets['HUGGINGFACE_TOKEN']}"} response = requests.post(api_url, headers=headers, json=payload).json() # Handle the response format if isinstance(response, list) and len(response) > 0: # Assuming the response is a list of generated text assistant_response = response[0].get("generated_text", "No response generated.") elif isinstance(response, dict) and "generated_text" in response: # If the response is a dictionary with generated_text assistant_response = response["generated_text"] else: assistant_response = "Unexpected model response format." except Exception as e: assistant_response = f"Error: {str(e)}" # Append user and assistant messages to history history.append((input_text, assistant_response)) return history # --- Streamlit App --- st.set_page_config(page_title="DeepSeek Chatbot", page_icon="🤖", layout="wide") # Title and description st.title("DeepSeek Chatbot") st.markdown(""" Created by [ruslanmv.com](https://ruslanmv.com/) This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit". You can also adjust optional parameters like system message, max new tokens, temperature, and top-p. """) # Sidebar for model selection and parameters with st.sidebar: st.header("Options") model_choice = st.radio( "Choose a Model", options=list(MODELS.keys()), index=0 ) st.header("Optional Parameters") system_message = st.text_area( "System Message", value="You are a friendly Chatbot created by ruslanmv.com", height=100 ) max_new_tokens = st.slider( "Max New Tokens", min_value=1, max_value=4000, value=200 ) temperature = st.slider( "Temperature", min_value=0.10, max_value=4.00, value=0.70 ) top_p = st.slider( "Top-p (nucleus sampling)", min_value=0.10, max_value=1.00, value=0.90 ) # Initialize chat history if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Display chat history for user_msg, assistant_msg in st.session_state.chat_history: with st.chat_message("user"): st.write(user_msg) with st.chat_message("assistant"): st.write(assistant_msg) # Input box for user message user_input = st.chat_input("Type your message here...") # Handle user input if user_input: # Add user message to chat history st.session_state.chat_history = chatbot( user_input, st.session_state.chat_history, model_choice, system_message, max_new_tokens, temperature, top_p ) # Rerun to update the chat display st.rerun()