Krish30 commited on
Commit
64eac63
·
verified ·
1 Parent(s): bc67a41

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +137 -0
  2. chat_history.db +0 -0
  3. config.json +1 -0
  4. requirements.txt +9 -0
  5. vectorize_documents.py +56 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import sqlite3
4
+ from datetime import datetime
5
+ import streamlit as st
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
+ from langchain_chroma import Chroma
8
+ from langchain_groq import ChatGroq
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+
12
+ from vectorize_documents import embeddings
13
+
14
+ working_dir = os.path.dirname(os.path.abspath(__file__))
15
+ config_data = json.load(open(f"{working_dir}/config.json"))
16
+ GROQ_API_KEY = config_data["GROQ_API_KEY"]
17
+ os.environ["GROQ_API_KEY"]= GROQ_API_KEY
18
+
19
+ # Set up the database with check_same_thread=False
20
+ def setup_db():
21
+ conn = sqlite3.connect("chat_history.db", check_same_thread=False) # Ensure thread-safe connection
22
+ cursor = conn.cursor()
23
+ cursor.execute("""
24
+ CREATE TABLE IF NOT EXISTS chat_histories (
25
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
26
+ username TEXT,
27
+ timestamp TEXT,
28
+ day TEXT,
29
+ user_message TEXT,
30
+ assistant_response TEXT
31
+ )
32
+ """)
33
+ conn.commit()
34
+ return conn # Return the connection
35
+
36
+ # Function to save chat history to SQLite
37
+ def save_chat_history(conn, username, timestamp, day, user_message, assistant_response):
38
+ cursor = conn.cursor()
39
+ cursor.execute("""
40
+ INSERT INTO chat_histories (username, timestamp, day, user_message, assistant_response)
41
+ VALUES (?, ?, ?, ?, ?)
42
+ """, (username, timestamp, day, user_message, assistant_response))
43
+ conn.commit()
44
+
45
+ # Function to set up vectorstore for embeddings
46
+ def setup_vectorstore():
47
+ embeddings = HuggingFaceEmbeddings()
48
+ vectorstore = Chroma(persist_directory="House_vectordb", embedding_function=embeddings)
49
+ return vectorstore
50
+
51
+ # Function to set up the chatbot chain
52
+ def chat_chain(vectorstore):
53
+ llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0)
54
+ retriever = vectorstore.as_retriever()
55
+ memory = ConversationBufferMemory(
56
+ llm=llm,
57
+ output_key="answer",
58
+ memory_key="chat_history",
59
+ return_messages=True
60
+ )
61
+ chain = ConversationalRetrievalChain.from_llm(
62
+ llm=llm,
63
+ retriever=retriever,
64
+ chain_type="stuff",
65
+ memory=memory,
66
+ verbose=True,
67
+ return_source_documents=True
68
+ )
69
+ return chain
70
+
71
+ # Streamlit UI setup
72
+ st.set_page_config(page_title="House.Ai", page_icon="🤖AI", layout="centered")
73
+
74
+ st.title("🤖 House.Ai")
75
+ st.subheader("You can ask your general questions and queries to our AI")
76
+
77
+ # Step 1: Initialize the connection and check if the user is already logged in
78
+ if "conn" not in st.session_state:
79
+ st.session_state.conn = setup_db()
80
+
81
+ if "username" not in st.session_state:
82
+ username = st.text_input("Enter your name to proceed:")
83
+ if username:
84
+ with st.spinner("Loading chatbot interface... Please wait."):
85
+ st.session_state.username = username
86
+ st.session_state.chat_history = [] # Initialize empty chat history in memory
87
+ st.session_state.vectorstore = setup_vectorstore()
88
+ st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
89
+ st.success(f"Welcome, {username}! The chatbot interface is ready.")
90
+ else:
91
+ username = st.session_state.username
92
+
93
+ # Step 2: Initialize components if not already set
94
+ if "conversational_chain" not in st.session_state:
95
+ st.session_state.vectorstore = setup_vectorstore()
96
+ st.session_state.conversational_chain = chat_chain(st.session_state.vectorstore)
97
+
98
+ # Step 3: Display the chat history in the UI
99
+ if "username" in st.session_state:
100
+ st.subheader(f"Hello {username}, start your query below!")
101
+
102
+ # Display chat history (messages exchanged between user and assistant)
103
+ if st.session_state.chat_history:
104
+ for message in st.session_state.chat_history:
105
+ if message['role'] == 'user':
106
+ with st.chat_message("user"):
107
+ st.markdown(message["content"])
108
+ elif message['role'] == 'assistant':
109
+ with st.chat_message("assistant"):
110
+ st.markdown(message["content"])
111
+
112
+ # Input field for the user to type their message
113
+ user_input = st.chat_input("Ask AI....")
114
+
115
+ if user_input:
116
+ with st.spinner("Processing your query... Please wait."):
117
+ # Save user input to chat history in memory
118
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
119
+
120
+ # Display user's message in chatbot (for UI display)
121
+ with st.chat_message("user"):
122
+ st.markdown(user_input)
123
+
124
+ # Get assistant's response from the chain
125
+ with st.chat_message("assistant"):
126
+ response = st.session_state.conversational_chain({"question": user_input})
127
+ assistant_response = response["answer"]
128
+ st.markdown(assistant_response)
129
+
130
+ # Save assistant's response to chat history in memory
131
+ st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
132
+
133
+ # Save the chat history to the database (SQLite)
134
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
135
+ day = datetime.now().strftime("%A") # Get the day of the week (e.g., Monday)
136
+ save_chat_history(st.session_state.conn, username, timestamp, day, user_input, assistant_response)
137
+
chat_history.db ADDED
Binary file (32.8 kB). View file
 
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"GROQ_API_KEY": "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"}
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.38.0
2
+ langchain-community==0.2.16
3
+ langchain-text-splitters==0.2.4
4
+ langchain-chroma==0.1.3
5
+ langchain-huggingface==0.0.3
6
+ langchain-groq==0.1.9
7
+ unstructured==0.15.0
8
+ nltk==3.8.1
9
+ deep-translator
vectorize_documents.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_text_splitters import CharacterTextSplitter
2
+ from langchain_huggingface import HuggingFaceEmbeddings
3
+ from langchain_chroma import Chroma
4
+ from langchain.docstore.document import Document
5
+ import pandas as pd
6
+ import os
7
+ import glob
8
+
9
+ # Define a function to perform vectorization for multiple CSV files
10
+ def vectorize_documents():
11
+ embeddings = HuggingFaceEmbeddings()
12
+
13
+ # Directory containing multiple CSV files
14
+ csv_directory = "Data" # Replace with your folder name
15
+ csv_files = glob.glob(os.path.join(csv_directory, "*.csv")) # Find all CSV files in the folder
16
+
17
+ documents = []
18
+
19
+ # Load and concatenate all CSV files
20
+ for file_path in csv_files:
21
+ df = pd.read_csv(file_path)
22
+ for _, row in df.iterrows():
23
+ # Combine all columns in the row into a single string
24
+ row_content = " ".join(row.astype(str))
25
+ documents.append(Document(page_content=row_content))
26
+
27
+ # Splitting the text and creating chunks of these documents
28
+ text_splitter = CharacterTextSplitter(
29
+ chunk_size=2000,
30
+ chunk_overlap=500
31
+ )
32
+
33
+ text_chunks = text_splitter.split_documents(documents)
34
+
35
+ # Process text chunks in batches
36
+ batch_size = 5000 # Chroma's batch size limit is 5461, set a slightly smaller size for safety
37
+ for i in range(0, len(text_chunks), batch_size):
38
+ batch = text_chunks[i:i + batch_size]
39
+
40
+ # Store the batch in Chroma vector DB
41
+ vectordb = Chroma.from_documents(
42
+ documents=batch,
43
+ embedding=embeddings,
44
+ persist_directory="House_vectordb"
45
+ )
46
+
47
+ print("Documents Vectorized and saved in VectorDB")
48
+
49
+ # Expose embeddings if needed
50
+ embeddings = HuggingFaceEmbeddings()
51
+
52
+
53
+
54
+ # Main guard to prevent execution on import
55
+ if __name__ == "__main__":
56
+ vectorize_documents()