Spaces:
Runtime error
Runtime error
Commit
·
bb116be
1
Parent(s):
d71fbcc
Added files for midterm
Browse files- Chatbot.py +73 -0
- Dockerfile +25 -10
- Documents/HR Policy Manual.docx +0 -0
- Documents/HR Policy Manual.pdf +0 -0
- Documents/IT Department Policy Manual.docx +0 -0
- Documents/IT Department Policy Manual.pdf +0 -0
- Documents/Tranportation Policy Manual.docx +0 -0
- Documents/Tranportation Policy Manual.pdf +0 -0
- app.py +0 -7
- pages/Load_Documents.py +45 -0
- pages/Pending_tickets.py +29 -0
- pages/__pycache__/_admin_util.cpython-311.pyc +0 -0
- pyproject.toml +21 -0
- requirements.txt +17 -2
- utils/__pycache__/_admin_util.cpython-311.pyc +0 -0
- utils/_admin_util.py +140 -0
- uv.lock +0 -0
Chatbot.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import streamlit as st
|
3 |
+
from utils._admin_util import invoke_rag, get_ticket_category
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Initialize categories in session state
|
7 |
+
if "categories" not in st.session_state:
|
8 |
+
st.session_state.categories = {
|
9 |
+
"HR Support": [],
|
10 |
+
"IT Support": [],
|
11 |
+
"Transportation Support": [],
|
12 |
+
"Other": []
|
13 |
+
}
|
14 |
+
|
15 |
+
def main():
|
16 |
+
load_dotenv()
|
17 |
+
|
18 |
+
# Page configuration
|
19 |
+
st.set_page_config(
|
20 |
+
page_title="Intelligent Customer Support Agent",
|
21 |
+
page_icon="🤖",
|
22 |
+
layout="wide"
|
23 |
+
)
|
24 |
+
|
25 |
+
# Sidebar for API key
|
26 |
+
with st.sidebar:
|
27 |
+
openai_api_key = st.text_input("OpenAI API Key", type="password")
|
28 |
+
st.markdown("---")
|
29 |
+
st.markdown("""
|
30 |
+
### About
|
31 |
+
This is an AI-powered customer support agent that can answer questions or raise support ticket about the company policies and procedures:
|
32 |
+
- HR policies
|
33 |
+
- IT policies
|
34 |
+
- Transportation policies
|
35 |
+
- Other policies
|
36 |
+
""")
|
37 |
+
|
38 |
+
# Set OpenAI API key
|
39 |
+
if openai_api_key:
|
40 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
41 |
+
|
42 |
+
# Main chat interface
|
43 |
+
st.title("🤖 Intelligent Customer Support Agent")
|
44 |
+
st.caption("Your 24/7 AI Customer Service Representative")
|
45 |
+
|
46 |
+
|
47 |
+
st.header("Automatic Ticket Classification Tool")
|
48 |
+
#Capture user input
|
49 |
+
st.write("We are here to help you, please ask your question:")
|
50 |
+
prompt = st.text_input("🔍")
|
51 |
+
|
52 |
+
if prompt:
|
53 |
+
if "vector_store" not in st.session_state:
|
54 |
+
st.error("Please load the document data first!")
|
55 |
+
st.stop()
|
56 |
+
|
57 |
+
response = invoke_rag(st.session_state.vector_store, prompt)
|
58 |
+
st.write(response)
|
59 |
+
|
60 |
+
#Button to create a ticket with respective department
|
61 |
+
button = st.button("Submit ticket?")
|
62 |
+
|
63 |
+
if button:
|
64 |
+
category = get_ticket_category(prompt)
|
65 |
+
st.session_state.categories[category].append(prompt)
|
66 |
+
st.success("Ticket submitted successfully!")
|
67 |
+
# Display category (optional)
|
68 |
+
st.write(f"Category: {category}")
|
69 |
+
|
70 |
+
|
71 |
+
if __name__ == '__main__':
|
72 |
+
main()
|
73 |
+
|
Dockerfile
CHANGED
@@ -1,16 +1,31 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
|
4 |
-
FROM python:3.9
|
5 |
|
|
|
6 |
RUN useradd -m -u 1000 user
|
7 |
USER user
|
8 |
-
ENV PATH="/home/user/.local/bin:$PATH"
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
CMD ["
|
|
|
1 |
+
# Use Python 3.11 which has better compatibility with most packages
|
2 |
+
FROM python:3.11-slim-bookworm
|
|
|
|
|
3 |
|
4 |
+
# Add user
|
5 |
RUN useradd -m -u 1000 user
|
6 |
USER user
|
|
|
7 |
|
8 |
+
# Set environment variables
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH \
|
11 |
+
PYTHONUNBUFFERED=1 \
|
12 |
+
PYTHONDONTWRITEBYTECODE=1
|
13 |
+
|
14 |
+
# Set working directory
|
15 |
+
WORKDIR $HOME/app
|
16 |
+
|
17 |
+
# Copy requirements first to leverage Docker cache
|
18 |
+
COPY --chown=user requirements.txt .
|
19 |
+
|
20 |
+
# Install pip and dependencies
|
21 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
22 |
+
pip install --no-cache-dir -r requirements.txt
|
23 |
+
|
24 |
+
# Copy the rest of the application
|
25 |
+
COPY --chown=user . .
|
26 |
|
27 |
+
# Expose the port
|
28 |
+
EXPOSE 7860
|
29 |
|
30 |
+
# Run the app
|
31 |
+
CMD ["streamlit", "run", "Chatbot.py", "--host", "0.0.0.0", "--port", "7860"]
|
Documents/HR Policy Manual.docx
ADDED
Binary file (16.7 kB). View file
|
|
Documents/HR Policy Manual.pdf
ADDED
Binary file (114 kB). View file
|
|
Documents/IT Department Policy Manual.docx
ADDED
Binary file (17.7 kB). View file
|
|
Documents/IT Department Policy Manual.pdf
ADDED
Binary file (122 kB). View file
|
|
Documents/Tranportation Policy Manual.docx
ADDED
Binary file (16.5 kB). View file
|
|
Documents/Tranportation Policy Manual.pdf
ADDED
Binary file (108 kB). View file
|
|
app.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
from fastapi import FastAPI
|
2 |
-
|
3 |
-
app = FastAPI()
|
4 |
-
|
5 |
-
@app.get("/")
|
6 |
-
def greet_json():
|
7 |
-
return {"Hello": "World!"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/Load_Documents.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
|
2 |
+
import streamlit as st
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
def main():
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
|
9 |
+
st.title("Please upload your files...📁 ")
|
10 |
+
|
11 |
+
# Upload multiple PDF files
|
12 |
+
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
|
13 |
+
|
14 |
+
if uploaded_files:
|
15 |
+
with st.spinner('Processing PDF files...'):
|
16 |
+
all_chunks = []
|
17 |
+
|
18 |
+
# Process each PDF file
|
19 |
+
for pdf in uploaded_files:
|
20 |
+
st.write(f"Processing: {pdf.name}")
|
21 |
+
|
22 |
+
# Extract text from PDF
|
23 |
+
text = read_pdf_data(pdf)
|
24 |
+
st.write(f"👉 Reading {pdf.name} done")
|
25 |
+
|
26 |
+
# Create chunks for this PDF
|
27 |
+
chunks = split_data(text)
|
28 |
+
all_chunks.extend(chunks)
|
29 |
+
st.write(f"👉 Splitting {pdf.name} into chunks done")
|
30 |
+
|
31 |
+
# Create embeddings once for all chunks
|
32 |
+
st.write("Creating embeddings...")
|
33 |
+
embeddings = create_embeddings()
|
34 |
+
st.write("👉 Creating embeddings instance done")
|
35 |
+
|
36 |
+
# Create vector store with all chunks
|
37 |
+
vector_store = create_vector_store(embeddings, all_chunks)
|
38 |
+
st.session_state.vector_store = vector_store
|
39 |
+
|
40 |
+
st.success(f"Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
|
41 |
+
st.write(f"Total chunks created: {len(all_chunks)}")
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == '__main__':
|
45 |
+
main()
|
pages/Pending_tickets.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
st.title('Departments')
|
4 |
+
|
5 |
+
# Create tabs
|
6 |
+
tab_titles = ['HR Support', 'IT Support', 'Transportation Support', 'Other']
|
7 |
+
tabs = st.tabs(tab_titles)
|
8 |
+
|
9 |
+
# Add content to each tab...
|
10 |
+
with tabs[0]:
|
11 |
+
st.header('HR Support tickets')
|
12 |
+
for ticket in st.session_state.categories["HR Support"]:
|
13 |
+
st.write(str( st.session_state.categories["HR Support"].index(ticket)+1)+" : "+ticket)
|
14 |
+
|
15 |
+
with tabs[1]:
|
16 |
+
st.header('IT Support tickets')
|
17 |
+
for ticket in st.session_state.categories['IT Support']:
|
18 |
+
st.write(str(st.session_state.categories['IT Support'].index(ticket)+1)+" : "+ticket)
|
19 |
+
|
20 |
+
with tabs[2]:
|
21 |
+
st.header('Transportation Support tickets')
|
22 |
+
for ticket in st.session_state.categories['Transportation Support']:
|
23 |
+
st.write(str(st.session_state.categories['Transportation Support'].index(ticket)+1)+" : "+ticket)
|
24 |
+
|
25 |
+
with tabs[3]:
|
26 |
+
st.header('Other tickets')
|
27 |
+
for ticket in st.session_state.categories['Other']:
|
28 |
+
st.write(str(st.session_state.categories['Other'].index(ticket)+1)+" : "+ticket)
|
29 |
+
|
pages/__pycache__/_admin_util.cpython-311.pyc
ADDED
Binary file (5.67 kB). View file
|
|
pyproject.toml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "midterm"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "intelligent customer support chat"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.13"
|
7 |
+
dependencies = [
|
8 |
+
"langchain-core==0.3.31",
|
9 |
+
"langchain==0.3.15",
|
10 |
+
"langchain-community==0.3.15",
|
11 |
+
"langchain-openai==0.3.1",
|
12 |
+
"langchain-qdrant==0.2.0",
|
13 |
+
"qdrant-client==1.13.2",
|
14 |
+
"tiktoken>=0.8.0",
|
15 |
+
"pymupdf==1.25.2",
|
16 |
+
"langgraph>=0.2.67",
|
17 |
+
"langsmith>=0.3.1",
|
18 |
+
"openai>=1.60.0",
|
19 |
+
"cohere>=5.13.11",
|
20 |
+
"lxml>=5.3.0",
|
21 |
+
]
|
requirements.txt
CHANGED
@@ -1,2 +1,17 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.1.13
|
2 |
+
streamlit==1.29.0
|
3 |
+
openai==1.14.2
|
4 |
+
tiktoken>=0.6.0
|
5 |
+
python-dotenv==1.0.1
|
6 |
+
pinecone-client==3.2.0
|
7 |
+
pypdf==4.0.1
|
8 |
+
joblib==1.3.2
|
9 |
+
pandas==2.2.0
|
10 |
+
scikit-learn==1.4.0
|
11 |
+
sentence-transformers==2.5.1
|
12 |
+
langchain-openai==0.1.0
|
13 |
+
PyPDF2
|
14 |
+
pymupdf
|
15 |
+
langchain-core>=0.1.0
|
16 |
+
qdrant-client>=1.7.0
|
17 |
+
langchain-qdrant>=0.1.0
|
utils/__pycache__/_admin_util.cpython-311.pyc
ADDED
Binary file (5.45 kB). View file
|
|
utils/_admin_util.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tiktoken
|
2 |
+
import PyPDF2
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
+
from langchain_qdrant import QdrantVectorStore
|
6 |
+
from qdrant_client import QdrantClient
|
7 |
+
from qdrant_client.http.models import Distance, VectorParams
|
8 |
+
from langchain_core.runnables import RunnablePassthrough
|
9 |
+
from langchain_core.output_parsers import StrOutputParser
|
10 |
+
from langchain_openai import ChatOpenAI
|
11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
12 |
+
|
13 |
+
|
14 |
+
HUMAN_TEMPLATE = """
|
15 |
+
#CONTEXT:
|
16 |
+
{context}
|
17 |
+
|
18 |
+
QUERY:
|
19 |
+
{query}
|
20 |
+
|
21 |
+
Use the provide context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, or it's not contained in the provided context response with "I don't know"
|
22 |
+
"""
|
23 |
+
|
24 |
+
#Read PDF data
|
25 |
+
def read_pdf_data(pdf_file):
|
26 |
+
# Create PDF reader object
|
27 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
28 |
+
|
29 |
+
# Extract text from each page
|
30 |
+
text = ""
|
31 |
+
for page in pdf_reader.pages:
|
32 |
+
text += page.extract_text()
|
33 |
+
return text
|
34 |
+
|
35 |
+
def tiktoken_len(text):
|
36 |
+
tokens = tiktoken.encoding_for_model("gpt-4").encode(
|
37 |
+
text,
|
38 |
+
)
|
39 |
+
return len(tokens)
|
40 |
+
|
41 |
+
#Split data into chunks
|
42 |
+
def split_data(text):
|
43 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
44 |
+
chunk_size = 100,
|
45 |
+
chunk_overlap = 0,
|
46 |
+
length_function = tiktoken_len,
|
47 |
+
)
|
48 |
+
chunks = text_splitter.split_text(text)
|
49 |
+
return chunks
|
50 |
+
|
51 |
+
#Create embeddings instance
|
52 |
+
|
53 |
+
def create_embeddings():
|
54 |
+
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
55 |
+
return embedding_model
|
56 |
+
|
57 |
+
|
58 |
+
# Create a vector database using Qdrant
|
59 |
+
def create_vector_store(embedding_model, chunks):
|
60 |
+
embedding_dim = 1536 # YOUR ANSWER HERE
|
61 |
+
client = QdrantClient(":memory:")
|
62 |
+
client.create_collection(
|
63 |
+
collection_name="lcel_doc_v2",
|
64 |
+
vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
|
65 |
+
)
|
66 |
+
vector_store = QdrantVectorStore(
|
67 |
+
client=client,
|
68 |
+
collection_name="lcel_doc_v2",
|
69 |
+
embedding=embedding_model,
|
70 |
+
)
|
71 |
+
_ = vector_store.add_texts(texts=chunks)
|
72 |
+
return vector_store
|
73 |
+
|
74 |
+
# create RAG
|
75 |
+
def create_rag(vector_store):
|
76 |
+
# Initialize OpenAI chat model with a valid model name
|
77 |
+
openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
|
78 |
+
|
79 |
+
# Create chat prompt template
|
80 |
+
chat_prompt = ChatPromptTemplate.from_messages([
|
81 |
+
("system", "You are a helpful assistant that answers questions based on the provided context."),
|
82 |
+
("human", HUMAN_TEMPLATE)
|
83 |
+
])
|
84 |
+
|
85 |
+
# Set up retriever with configurable k
|
86 |
+
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
|
87 |
+
|
88 |
+
# Create RAG pipeline
|
89 |
+
simple_rag = (
|
90 |
+
{"context": retriever, "query": RunnablePassthrough()}
|
91 |
+
| chat_prompt
|
92 |
+
| openai_chat_model
|
93 |
+
| StrOutputParser()
|
94 |
+
)
|
95 |
+
|
96 |
+
return simple_rag
|
97 |
+
|
98 |
+
# Invoke RAG
|
99 |
+
def invoke_rag(vector_store, query):
|
100 |
+
rag_chain = create_rag(vector_store)
|
101 |
+
response = rag_chain.invoke(query)
|
102 |
+
return response
|
103 |
+
|
104 |
+
|
105 |
+
def get_ticket_category(query):
|
106 |
+
# Define the system prompt for categorization
|
107 |
+
CATEGORY_PROMPT = """You are a ticket categorization system. Categorize the following query into exactly one of these categories:
|
108 |
+
- HR Support: For queries about employment, benefits, leaves, workplace policies, etc.
|
109 |
+
- IT Support: For queries about software, hardware, network, system access, etc.
|
110 |
+
- Transportation Support: For queries about company transport, parking, vehicle maintenance, etc.
|
111 |
+
- Other: For queries that do not fit into the above categories.
|
112 |
+
Respond with ONLY the category name, nothing else.
|
113 |
+
|
114 |
+
Query: {query}
|
115 |
+
"""
|
116 |
+
|
117 |
+
# Create OpenAI client for categorization
|
118 |
+
client = ChatOpenAI(model="gpt-3.5-turbo")
|
119 |
+
|
120 |
+
# Create the prompt template
|
121 |
+
prompt = ChatPromptTemplate.from_messages([
|
122 |
+
("system", CATEGORY_PROMPT)
|
123 |
+
])
|
124 |
+
|
125 |
+
# Create the chain
|
126 |
+
chain = prompt | client | StrOutputParser()
|
127 |
+
|
128 |
+
# Get the category
|
129 |
+
category = chain.invoke({"query": query})
|
130 |
+
|
131 |
+
# Clean and validate the response
|
132 |
+
category = category.strip()
|
133 |
+
valid_categories = ["HR Support", "IT Support", "Transportation Support"]
|
134 |
+
|
135 |
+
if category not in valid_categories:
|
136 |
+
return "Other" # Default category if classification fails
|
137 |
+
|
138 |
+
return category
|
139 |
+
|
140 |
+
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|