Spaces:

melissalau
/

streamlit-ollama-chatbot

Sleeping

File size: 5,722 Bytes

import streamlit as st
from langchain.llms import CTransformers
from langchain.memory import ConversationBufferMemory
from langchain.memory.chat_message_histories import ChatMessageHistory
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableSequence
from huggingface_hub import hf_hub_download
import os

# --- Model Definitions ---
MODEL_MAP = {
    "TinyLlama (1.1B)": {
        "repo_id": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
        "filename": "tinyllama-1.1b-chat-v1.0.Q4_0.gguf",
        "type": "llama"
    },
    "Deepseek-Coder (1.3B)": {
        "repo_id": "TheBloke/deepseek-coder-1.3b-base-GGUF",
        "filename": "deepseek-coder-1.3b-base.Q4_K_M.gguf",
        "type": "llama"
    }
}

# --- Model Loading ---
@st.cache_resource
def download_model_from_hub(repo_id, filename):
    st.write(f"Downloading model '{filename}' from Hugging Face Hub...")
    model_path = hf_hub_download(repo_id=repo_id, filename=filename)
    return model_path

@st.cache_resource
def load_llm(model_name):
    model_info = MODEL_MAP[model_name]
    model_path = download_model_from_hub(model_info["repo_id"], model_info["filename"])
    llm = CTransformers(
        model=model_path,
        model_type=model_info["type"],
        config={'max_new_tokens': 2048, 'temperature': 0.7}
    )
    return llm

# ----------------- Streamlit UI and Logic -----------------
st.set_page_config(layout="wide")
st.title("My Local Chatbot")

st.sidebar.header("Settings")
selected_model_name = st.sidebar.selectbox("Choose a Model", list(MODEL_MAP.keys()))

llm = load_llm(selected_model_name)
st.success(f"Model '{selected_model_name}' loaded successfully!")

MAX_HISTORY = st.sidebar.number_input("Max History", min_value=1, max_value=10, value=2, step=1)
CONTEXT_SIZE = st.sidebar.number_input("Context Size", min_value=1024, max_value=16384, value=8192, step=1024)
TEMPERATURE = st.sidebar.slider("Temperature", 0.0, 1.5, 0.7, 0.1)
TOP_P = st.sidebar.slider("Top-p (nucleus sampling)", 0.0, 1.0, 0.9, 0.05)
TOP_K = st.sidebar.slider("Top-k", 0, 100, 40, 5)
MAX_TOKENS = st.sidebar.number_input("Max Tokens", min_value=256, max_value=16384, value=2048, step=256)

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "memory" not in st.session_state:
    st.session_state.memory = ConversationBufferMemory(return_messages=True)
if "summary" not in st.session_state:
    st.session_state.summary = ""

def clear_memory():
    chat_history = ChatMessageHistory()
    st.session_state.memory = ConversationBufferMemory(chat_memory=chat_history)
    st.session_state.chat_history = []
    st.session_state.summary = ""

if st.sidebar.button("Clear Conversation History"):
    clear_memory()

# --- CORRECTED: Summary chain is now defined globally ---
summary_prompt_template = PromptTemplate(
    input_variables=["chat_history"],
    template="You are a summarizer. Summarize the following conversation to preserve key information and context. \n\n{chat_history}"
)

summary_chain = summary_prompt_template | llm

def get_summary(chat_history_str):
    return summary_chain.invoke({"chat_history": chat_history_str})

def summarize_chat():
    if not st.session_state.chat_history:
        return "No chat history to summarize."
    return get_summary(st.session_state.chat_history)

if st.sidebar.button("Summarize Chat"):
    with st.sidebar:
        st.markdown("**Chat Summary:**")
        summary = summarize_chat()
        st.success(summary)

# --- Model-specific prompt templates ---
if "Llama" in selected_model_name:
    template = """[INST]
    You are a helpful assistant.
    Current conversation summary:
    {summary}
    Conversation history:
    {history}
    User: {human_input}
    [/INST]
    Assistant:"""
elif "Deepseek" in selected_model_name:
    template = """<|im_start|>system
    You are a helpful assistant.
    Current conversation summary:
    {summary}<|im_end|>
    <|im_start|>user
    {history}
    {human_input}<|im_end|>
    <|im_start|>assistant
    """

prompt_template = PromptTemplate(
    input_variables=["summary", "history", "human_input"],
    template=template
)

chain = prompt_template | llm

for msg in st.session_state.chat_history:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])

def trim_memory():
    if len(st.session_state.chat_history) > MAX_HISTORY * 2:
        history_to_summarize = st.session_state.chat_history[:(len(st.session_state.chat_history) - MAX_HISTORY * 2)]
        history_str = ""
        for msg in history_to_summarize:
            history_str += f"{msg['role']}: {msg['content']}\n"
        new_summary = summary_chain.invoke({"chat_history": history_str})
        st.session_state.summary += "\n" + new_summary
        st.session_state.chat_history = st.session_state.chat_history[(len(st.session_state.chat_history) - MAX_HISTORY * 2):]

if prompt := st.chat_input("Say something"):
    with st.chat_message("user"):
        st.markdown(prompt)
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    trim_memory()
    formatted_history = ""
    for msg in st.session_state.chat_history:
        formatted_history += f"{msg['role']}: {msg['content']}\n"
    with st.chat_message("assistant"):
        response_container = st.empty()
        full_response = ""
        for chunk in chain.stream({
                "human_input": prompt,
                "history": formatted_history,
                "summary": st.session_state.summary
            }):
            full_response += chunk
            response_container.markdown(full_response)
    st.session_state.chat_history.append({"role": "assistant", "content": full_response})