deepali1021 commited on
Commit
bb116be
·
1 Parent(s): d71fbcc

Added files for midterm

Browse files
Chatbot.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from utils._admin_util import invoke_rag, get_ticket_category
4
+ import os
5
+
6
+ # Initialize categories in session state
7
+ if "categories" not in st.session_state:
8
+ st.session_state.categories = {
9
+ "HR Support": [],
10
+ "IT Support": [],
11
+ "Transportation Support": [],
12
+ "Other": []
13
+ }
14
+
15
+ def main():
16
+ load_dotenv()
17
+
18
+ # Page configuration
19
+ st.set_page_config(
20
+ page_title="Intelligent Customer Support Agent",
21
+ page_icon="🤖",
22
+ layout="wide"
23
+ )
24
+
25
+ # Sidebar for API key
26
+ with st.sidebar:
27
+ openai_api_key = st.text_input("OpenAI API Key", type="password")
28
+ st.markdown("---")
29
+ st.markdown("""
30
+ ### About
31
+ This is an AI-powered customer support agent that can answer questions or raise support ticket about the company policies and procedures:
32
+ - HR policies
33
+ - IT policies
34
+ - Transportation policies
35
+ - Other policies
36
+ """)
37
+
38
+ # Set OpenAI API key
39
+ if openai_api_key:
40
+ os.environ["OPENAI_API_KEY"] = openai_api_key
41
+
42
+ # Main chat interface
43
+ st.title("🤖 Intelligent Customer Support Agent")
44
+ st.caption("Your 24/7 AI Customer Service Representative")
45
+
46
+
47
+ st.header("Automatic Ticket Classification Tool")
48
+ #Capture user input
49
+ st.write("We are here to help you, please ask your question:")
50
+ prompt = st.text_input("🔍")
51
+
52
+ if prompt:
53
+ if "vector_store" not in st.session_state:
54
+ st.error("Please load the document data first!")
55
+ st.stop()
56
+
57
+ response = invoke_rag(st.session_state.vector_store, prompt)
58
+ st.write(response)
59
+
60
+ #Button to create a ticket with respective department
61
+ button = st.button("Submit ticket?")
62
+
63
+ if button:
64
+ category = get_ticket_category(prompt)
65
+ st.session_state.categories[category].append(prompt)
66
+ st.success("Ticket submitted successfully!")
67
+ # Display category (optional)
68
+ st.write(f"Category: {category}")
69
+
70
+
71
+ if __name__ == '__main__':
72
+ main()
73
+
Dockerfile CHANGED
@@ -1,16 +1,31 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
-
4
- FROM python:3.9
5
 
 
6
  RUN useradd -m -u 1000 user
7
  USER user
8
- ENV PATH="/home/user/.local/bin:$PATH"
9
 
10
- WORKDIR /app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- COPY --chown=user . /app
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Use Python 3.11 which has better compatibility with most packages
2
+ FROM python:3.11-slim-bookworm
 
 
3
 
4
+ # Add user
5
  RUN useradd -m -u 1000 user
6
  USER user
 
7
 
8
+ # Set environment variables
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH \
11
+ PYTHONUNBUFFERED=1 \
12
+ PYTHONDONTWRITEBYTECODE=1
13
+
14
+ # Set working directory
15
+ WORKDIR $HOME/app
16
+
17
+ # Copy requirements first to leverage Docker cache
18
+ COPY --chown=user requirements.txt .
19
+
20
+ # Install pip and dependencies
21
+ RUN pip install --no-cache-dir --upgrade pip && \
22
+ pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy the rest of the application
25
+ COPY --chown=user . .
26
 
27
+ # Expose the port
28
+ EXPOSE 7860
29
 
30
+ # Run the app
31
+ CMD ["streamlit", "run", "Chatbot.py", "--host", "0.0.0.0", "--port", "7860"]
Documents/HR Policy Manual.docx ADDED
Binary file (16.7 kB). View file
 
Documents/HR Policy Manual.pdf ADDED
Binary file (114 kB). View file
 
Documents/IT Department Policy Manual.docx ADDED
Binary file (17.7 kB). View file
 
Documents/IT Department Policy Manual.pdf ADDED
Binary file (122 kB). View file
 
Documents/Tranportation Policy Manual.docx ADDED
Binary file (16.5 kB). View file
 
Documents/Tranportation Policy Manual.pdf ADDED
Binary file (108 kB). View file
 
app.py DELETED
@@ -1,7 +0,0 @@
1
- from fastapi import FastAPI
2
-
3
- app = FastAPI()
4
-
5
- @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
pages/Load_Documents.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
2
+ import streamlit as st
3
+ from dotenv import load_dotenv
4
+
5
+ def main():
6
+ load_dotenv()
7
+
8
+ st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
9
+ st.title("Please upload your files...📁 ")
10
+
11
+ # Upload multiple PDF files
12
+ uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
13
+
14
+ if uploaded_files:
15
+ with st.spinner('Processing PDF files...'):
16
+ all_chunks = []
17
+
18
+ # Process each PDF file
19
+ for pdf in uploaded_files:
20
+ st.write(f"Processing: {pdf.name}")
21
+
22
+ # Extract text from PDF
23
+ text = read_pdf_data(pdf)
24
+ st.write(f"👉 Reading {pdf.name} done")
25
+
26
+ # Create chunks for this PDF
27
+ chunks = split_data(text)
28
+ all_chunks.extend(chunks)
29
+ st.write(f"👉 Splitting {pdf.name} into chunks done")
30
+
31
+ # Create embeddings once for all chunks
32
+ st.write("Creating embeddings...")
33
+ embeddings = create_embeddings()
34
+ st.write("👉 Creating embeddings instance done")
35
+
36
+ # Create vector store with all chunks
37
+ vector_store = create_vector_store(embeddings, all_chunks)
38
+ st.session_state.vector_store = vector_store
39
+
40
+ st.success(f"Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
41
+ st.write(f"Total chunks created: {len(all_chunks)}")
42
+
43
+
44
+ if __name__ == '__main__':
45
+ main()
pages/Pending_tickets.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title('Departments')
4
+
5
+ # Create tabs
6
+ tab_titles = ['HR Support', 'IT Support', 'Transportation Support', 'Other']
7
+ tabs = st.tabs(tab_titles)
8
+
9
+ # Add content to each tab...
10
+ with tabs[0]:
11
+ st.header('HR Support tickets')
12
+ for ticket in st.session_state.categories["HR Support"]:
13
+ st.write(str( st.session_state.categories["HR Support"].index(ticket)+1)+" : "+ticket)
14
+
15
+ with tabs[1]:
16
+ st.header('IT Support tickets')
17
+ for ticket in st.session_state.categories['IT Support']:
18
+ st.write(str(st.session_state.categories['IT Support'].index(ticket)+1)+" : "+ticket)
19
+
20
+ with tabs[2]:
21
+ st.header('Transportation Support tickets')
22
+ for ticket in st.session_state.categories['Transportation Support']:
23
+ st.write(str(st.session_state.categories['Transportation Support'].index(ticket)+1)+" : "+ticket)
24
+
25
+ with tabs[3]:
26
+ st.header('Other tickets')
27
+ for ticket in st.session_state.categories['Other']:
28
+ st.write(str(st.session_state.categories['Other'].index(ticket)+1)+" : "+ticket)
29
+
pages/__pycache__/_admin_util.cpython-311.pyc ADDED
Binary file (5.67 kB). View file
 
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "midterm"
3
+ version = "0.1.0"
4
+ description = "intelligent customer support chat"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "langchain-core==0.3.31",
9
+ "langchain==0.3.15",
10
+ "langchain-community==0.3.15",
11
+ "langchain-openai==0.3.1",
12
+ "langchain-qdrant==0.2.0",
13
+ "qdrant-client==1.13.2",
14
+ "tiktoken>=0.8.0",
15
+ "pymupdf==1.25.2",
16
+ "langgraph>=0.2.67",
17
+ "langsmith>=0.3.1",
18
+ "openai>=1.60.0",
19
+ "cohere>=5.13.11",
20
+ "lxml>=5.3.0",
21
+ ]
requirements.txt CHANGED
@@ -1,2 +1,17 @@
1
- fastapi
2
- uvicorn[standard]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.13
2
+ streamlit==1.29.0
3
+ openai==1.14.2
4
+ tiktoken>=0.6.0
5
+ python-dotenv==1.0.1
6
+ pinecone-client==3.2.0
7
+ pypdf==4.0.1
8
+ joblib==1.3.2
9
+ pandas==2.2.0
10
+ scikit-learn==1.4.0
11
+ sentence-transformers==2.5.1
12
+ langchain-openai==0.1.0
13
+ PyPDF2
14
+ pymupdf
15
+ langchain-core>=0.1.0
16
+ qdrant-client>=1.7.0
17
+ langchain-qdrant>=0.1.0
utils/__pycache__/_admin_util.cpython-311.pyc ADDED
Binary file (5.45 kB). View file
 
utils/_admin_util.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tiktoken
2
+ import PyPDF2
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_openai.embeddings import OpenAIEmbeddings
5
+ from langchain_qdrant import QdrantVectorStore
6
+ from qdrant_client import QdrantClient
7
+ from qdrant_client.http.models import Distance, VectorParams
8
+ from langchain_core.runnables import RunnablePassthrough
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+
13
+
14
+ HUMAN_TEMPLATE = """
15
+ #CONTEXT:
16
+ {context}
17
+
18
+ QUERY:
19
+ {query}
20
+
21
+ Use the provide context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, or it's not contained in the provided context response with "I don't know"
22
+ """
23
+
24
+ #Read PDF data
25
+ def read_pdf_data(pdf_file):
26
+ # Create PDF reader object
27
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
28
+
29
+ # Extract text from each page
30
+ text = ""
31
+ for page in pdf_reader.pages:
32
+ text += page.extract_text()
33
+ return text
34
+
35
+ def tiktoken_len(text):
36
+ tokens = tiktoken.encoding_for_model("gpt-4").encode(
37
+ text,
38
+ )
39
+ return len(tokens)
40
+
41
+ #Split data into chunks
42
+ def split_data(text):
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size = 100,
45
+ chunk_overlap = 0,
46
+ length_function = tiktoken_len,
47
+ )
48
+ chunks = text_splitter.split_text(text)
49
+ return chunks
50
+
51
+ #Create embeddings instance
52
+
53
+ def create_embeddings():
54
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
55
+ return embedding_model
56
+
57
+
58
+ # Create a vector database using Qdrant
59
+ def create_vector_store(embedding_model, chunks):
60
+ embedding_dim = 1536 # YOUR ANSWER HERE
61
+ client = QdrantClient(":memory:")
62
+ client.create_collection(
63
+ collection_name="lcel_doc_v2",
64
+ vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
65
+ )
66
+ vector_store = QdrantVectorStore(
67
+ client=client,
68
+ collection_name="lcel_doc_v2",
69
+ embedding=embedding_model,
70
+ )
71
+ _ = vector_store.add_texts(texts=chunks)
72
+ return vector_store
73
+
74
+ # create RAG
75
+ def create_rag(vector_store):
76
+ # Initialize OpenAI chat model with a valid model name
77
+ openai_chat_model = ChatOpenAI(model="gpt-3.5-turbo")
78
+
79
+ # Create chat prompt template
80
+ chat_prompt = ChatPromptTemplate.from_messages([
81
+ ("system", "You are a helpful assistant that answers questions based on the provided context."),
82
+ ("human", HUMAN_TEMPLATE)
83
+ ])
84
+
85
+ # Set up retriever with configurable k
86
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
87
+
88
+ # Create RAG pipeline
89
+ simple_rag = (
90
+ {"context": retriever, "query": RunnablePassthrough()}
91
+ | chat_prompt
92
+ | openai_chat_model
93
+ | StrOutputParser()
94
+ )
95
+
96
+ return simple_rag
97
+
98
+ # Invoke RAG
99
+ def invoke_rag(vector_store, query):
100
+ rag_chain = create_rag(vector_store)
101
+ response = rag_chain.invoke(query)
102
+ return response
103
+
104
+
105
+ def get_ticket_category(query):
106
+ # Define the system prompt for categorization
107
+ CATEGORY_PROMPT = """You are a ticket categorization system. Categorize the following query into exactly one of these categories:
108
+ - HR Support: For queries about employment, benefits, leaves, workplace policies, etc.
109
+ - IT Support: For queries about software, hardware, network, system access, etc.
110
+ - Transportation Support: For queries about company transport, parking, vehicle maintenance, etc.
111
+ - Other: For queries that do not fit into the above categories.
112
+ Respond with ONLY the category name, nothing else.
113
+
114
+ Query: {query}
115
+ """
116
+
117
+ # Create OpenAI client for categorization
118
+ client = ChatOpenAI(model="gpt-3.5-turbo")
119
+
120
+ # Create the prompt template
121
+ prompt = ChatPromptTemplate.from_messages([
122
+ ("system", CATEGORY_PROMPT)
123
+ ])
124
+
125
+ # Create the chain
126
+ chain = prompt | client | StrOutputParser()
127
+
128
+ # Get the category
129
+ category = chain.invoke({"query": query})
130
+
131
+ # Clean and validate the response
132
+ category = category.strip()
133
+ valid_categories = ["HR Support", "IT Support", "Transportation Support"]
134
+
135
+ if category not in valid_categories:
136
+ return "Other" # Default category if classification fails
137
+
138
+ return category
139
+
140
+
uv.lock ADDED
The diff for this file is too large to render. See raw diff