Spaces:

Krish30
/

House.Ai

Sleeping

App Files Files Community

Krish30 commited on Jan 5

Commit

64eac63

verified ·

1 Parent(s): bc67a41

Upload 5 files

Browse files

Files changed (5) hide show

app.py +137 -0
chat_history.db +0 -0
config.json +1 -0
requirements.txt +9 -0
vectorize_documents.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import json
+import sqlite3
+from datetime import datetime
+import streamlit as st
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain_groq import ChatGroq
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+from vectorize_documents import embeddings
+working_dir = os.path.dirname(os.path.abspath(__file__))
+config_data = json.load(open(f"{working_dir}/config.json"))
+GROQ_API_KEY = config_data["GROQ_API_KEY"]
+os.environ["GROQ_API_KEY"]= GROQ_API_KEY
+# Set up the database with check_same_thread=False
+def setup_db():
+    conn = sqlite3.connect("chat_history.db", check_same_thread=False)  # Ensure thread-safe connection
+    cursor = conn.cursor()
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS chat_histories (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            username TEXT,
+            timestamp TEXT,
+            day TEXT,
+            user_message TEXT,
+            assistant_response TEXT
+        )
+    """)
+    conn.commit()
+    return conn  # Return the connection
+# Function to save chat history to SQLite
+def save_chat_history(conn, username, timestamp, day, user_message, assistant_response):
+    cursor = conn.cursor()
+    cursor.execute("""
+        INSERT INTO chat_histories (username, timestamp, day, user_message, assistant_response)
+        VALUES (?, ?, ?, ?, ?)
+    """, (username, timestamp, day, user_message, assistant_response))
+    conn.commit()
+# Function to set up vectorstore for embeddings
+def setup_vectorstore():
+    embeddings = HuggingFaceEmbeddings()
+    vectorstore = Chroma(persist_directory="House_vectordb", embedding_function=embeddings)
+    return vectorstore
+# Function to set up the chatbot chain
+def chat_chain(vectorstore):
+    llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0)
+    retriever = vectorstore.as_retriever()
+    memory = ConversationBufferMemory(
+        llm=llm,
+        output_key="answer",
+        memory_key="chat_history",
+        return_messages=True
+    )
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        chain_type="stuff",
+        memory=memory,
+        verbose=True,
+        return_source_documents=True
+    )
+    return chain
+# Streamlit UI setup
+st.set_page_config(page_title="House.Ai", page_icon="🤖AI", layout="centered")
+st.title("🤖 House.Ai")
+st.subheader("You can ask your general questions and queries to our AI")
+# Step 1: Initialize the connection and check if the user is already logged in
+if "conn" not in st.session_state:
+    st.session_state.conn = setup_db()
+if "username" not in st.session_state:
+    username = st.text_input("Enter your name to proceed:")
+    if username:
+        with st.spinner("Loading chatbot interface... Please wait."):
+            st.session_state.username = username
+            st.session_state.chat_history = []  # Initialize empty chat history in memory
+            st.session_state.vectorstore = setup_vectorstore()
+            st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
+            st.success(f"Welcome, {username}! The chatbot interface is ready.")
+else:
+    username = st.session_state.username
+# Step 2: Initialize components if not already set
+if "conversational_chain" not in st.session_state:
+    st.session_state.vectorstore = setup_vectorstore()
+    st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
+# Step 3: Display the chat history in the UI
+if "username" in st.session_state:
+    st.subheader(f"Hello {username}, start your query below!")
+    # Display chat history (messages exchanged between user and assistant)
+    if st.session_state.chat_history:
+        for message in st.session_state.chat_history:
+            if message['role'] == 'user':
+                with st.chat_message("user"):
+                    st.markdown(message["content"])
+            elif message['role'] == 'assistant':
+                with st.chat_message("assistant"):
+                    st.markdown(message["content"])
+    # Input field for the user to type their message
+    user_input = st.chat_input("Ask AI....")
+    if user_input:
+        with st.spinner("Processing your query... Please wait."):
+            # Save user input to chat history in memory
+            st.session_state.chat_history.append({"role": "user", "content": user_input})
+            # Display user's message in chatbot (for UI display)
+            with st.chat_message("user"):
+                st.markdown(user_input)
+            # Get assistant's response from the chain
+            with st.chat_message("assistant"):
+                response = st.session_state.conversational_chain({"question": user_input})
+                assistant_response = response["answer"]
+                st.markdown(assistant_response)
+                # Save assistant's response to chat history in memory
+                st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
+                # Save the chat history to the database (SQLite)
+                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                day = datetime.now().strftime("%A")  # Get the day of the week (e.g., Monday)
+                save_chat_history(st.session_state.conn, username, timestamp, day, user_input, assistant_response)

chat_history.db ADDED Viewed

Binary file (32.8 kB). View file

config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"GROQ_API_KEY": "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"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit==1.38.0
+langchain-community==0.2.16
+langchain-text-splitters==0.2.4
+langchain-chroma==0.1.3
+langchain-huggingface==0.0.3
+langchain-groq==0.1.9
+unstructured==0.15.0
+nltk==3.8.1
+deep-translator

vectorize_documents.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from langchain_text_splitters import CharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+from langchain.docstore.document import Document
+import pandas as pd
+import os
+import glob
+# Define a function to perform vectorization for multiple CSV files
+def vectorize_documents():
+    embeddings = HuggingFaceEmbeddings()
+    # Directory containing multiple CSV files
+    csv_directory = "Data"  # Replace with your folder name
+    csv_files = glob.glob(os.path.join(csv_directory, "*.csv"))  # Find all CSV files in the folder
+    documents = []
+    # Load and concatenate all CSV files
+    for file_path in csv_files:
+        df = pd.read_csv(file_path)
+        for _, row in df.iterrows():
+            # Combine all columns in the row into a single string
+            row_content = " ".join(row.astype(str))
+            documents.append(Document(page_content=row_content))
+    # Splitting the text and creating chunks of these documents
+    text_splitter = CharacterTextSplitter(
+        chunk_size=2000,
+        chunk_overlap=500
+    )
+    text_chunks = text_splitter.split_documents(documents)
+    # Process text chunks in batches
+    batch_size = 5000  # Chroma's batch size limit is 5461, set a slightly smaller size for safety
+    for i in range(0, len(text_chunks), batch_size):
+        batch = text_chunks[i:i + batch_size]
+        # Store the batch in Chroma vector DB
+        vectordb = Chroma.from_documents(
+            documents=batch,
+            embedding=embeddings,
+            persist_directory="House_vectordb"
+        )
+    print("Documents Vectorized and saved in VectorDB")
+# Expose embeddings if needed
+embeddings = HuggingFaceEmbeddings()
+# Main guard to prevent execution on import
+if __name__ == "__main__":
+    vectorize_documents()