deepali1021 commited on
Commit
bd37926
·
1 Parent(s): 0c40571

Added files

Browse files
Chatbot.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import streamlit as st
3
+ from utils._admin_util import invoke_rag, get_ticket_category
4
+ import os
5
+
6
+ # Initialize categories in session state
7
+ if "categories" not in st.session_state:
8
+ st.session_state.categories = {
9
+ "HR Support": [],
10
+ "IT Support": [],
11
+ "Transportation Support": [],
12
+ "Other": []
13
+ }
14
+
15
+ def main():
16
+ load_dotenv()
17
+
18
+ # Page configuration
19
+ st.set_page_config(
20
+ page_title="Intelligent Customer Support Agent",
21
+ page_icon="🤖",
22
+ layout="wide"
23
+ )
24
+
25
+ # Sidebar for API key
26
+ with st.sidebar:
27
+ #openai_api_key = st.text_input("OpenAI API Key", type="password")
28
+ st.markdown("---")
29
+ st.markdown("""
30
+ ### About
31
+ This is an AI-powered customer support agent that can answer questions or raise support ticket about the company policies and procedures:
32
+ - HR policies
33
+ - IT policies
34
+ - Transportation policies
35
+ - Other policies
36
+ """)
37
+
38
+ # Set OpenAI API key
39
+ openai_api_key = os.getenv("OPENAI_API_KEY")
40
+ if not openai_api_key:
41
+ st.error("OpenAI API key not found! Please check your .env file.")
42
+ st.stop()
43
+
44
+ # Main chat interface
45
+ st.title("🤖 Intelligent Customer Support Agent")
46
+ st.caption("Your 24/7 AI Customer Service Representative")
47
+
48
+
49
+ st.header("Automatic Ticket Classification Tool")
50
+ #Capture user input
51
+ st.write("We are here to help you, please ask your question:")
52
+ prompt = st.text_input("🔍")
53
+
54
+ if prompt:
55
+ if "vector_store" not in st.session_state:
56
+ st.error("Please load the document data first!")
57
+ st.stop()
58
+
59
+ response = invoke_rag(st.session_state.vector_store, prompt)
60
+ st.write(response)
61
+
62
+ #Button to create a ticket with respective department
63
+ button = st.button("Submit ticket?")
64
+
65
+ if button:
66
+ category = get_ticket_category(prompt)
67
+ st.session_state.categories[category].append(prompt)
68
+ st.success("Ticket submitted successfully!")
69
+ # Display category (optional)
70
+ st.write(f"Category: {category}")
71
+
72
+
73
+ if __name__ == '__main__':
74
+ main()
75
+
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MIKE DEAN ADAPTATION OF THE CHAINLIT DOCKERFILE USING UV
2
+ # December 31, 2024
3
+ # Get a distribution that has uv already installed
4
+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
5
+ # Add user - this is the user that will run the app
6
+ # If you do not set user, the app will run as root (undesirable)
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ # Set the home directory and path
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+ # NEW ENV STATEMENT
13
+ ENV UVICORN_WS_PROTOCOL=websockets
14
+ # Set the working directory
15
+ WORKDIR $HOME/app
16
+ # Copy the app to the container
17
+ COPY --chown=user . $HOME/app
18
+ # Install the dependencies
19
+ RUN uv sync --frozen
20
+ # Expose the port
21
+ EXPOSE 7860
22
+ # Run the app
23
+ CMD ["uv", "run", "-m", "streamlit", "run", "Chatbot.py", "--server.address", "0.0.0.0", "--server.port", "7860"]
Documents/HR Policy Manual.docx ADDED
Binary file (16.7 kB). View file
 
Documents/HR Policy Manual.pdf ADDED
Binary file (114 kB). View file
 
Documents/IT Department Policy Manual.docx ADDED
Binary file (17.7 kB). View file
 
Documents/IT Department Policy Manual.pdf ADDED
Binary file (122 kB). View file
 
Documents/Tranportation Policy Manual.docx ADDED
Binary file (16.5 kB). View file
 
Documents/Tranportation Policy Manual.pdf ADDED
Binary file (108 kB). View file
 
pages/Load_Documents.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
4
+ import streamlit as st
5
+ from dotenv import load_dotenv
6
+
7
+ def validate_api_key(api_key):
8
+ """Test if the API key is valid"""
9
+ try:
10
+ # Make a small test request to OpenAI
11
+ client = openai.OpenAI(api_key=api_key)
12
+ client.embeddings.create(input="test", model="text-embedding-ada-002")
13
+ return True
14
+ except openai.AuthenticationError:
15
+ st.error("❌ Invalid API key")
16
+ return False
17
+ except openai.PermissionDeniedError:
18
+ st.error("❌ Permission denied. Please check your API key's permissions")
19
+ return False
20
+ except Exception as e:
21
+ st.error(f"❌ API key validation error: {str(e)}")
22
+ return False
23
+
24
+ def main():
25
+ load_dotenv()
26
+
27
+ # Add detailed API key verification
28
+ api_key = os.getenv("OPENAI_API_KEY")
29
+ if not api_key:
30
+ st.error("❌ OpenAI API key not found! Please ensure it's set in the environment variables.")
31
+ st.info("To set up your API key:")
32
+ st.code("1. Go to Hugging Face Space settings\n2. Add OPENAI_API_KEY in Repository Secrets")
33
+ st.stop()
34
+
35
+ # Validate the API key
36
+ # if not validate_api_key(api_key):
37
+ # st.stop()
38
+
39
+ #print("API KEY :",api_key)
40
+
41
+ st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
42
+ st.title("Please upload your files...📁 ")
43
+ try:
44
+ # Upload multiple PDF files
45
+ uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
46
+
47
+ if uploaded_files:
48
+
49
+ with st.spinner('Processing PDF files...'):
50
+ all_chunks = []
51
+
52
+ # Process each PDF file
53
+ for pdf in uploaded_files:
54
+
55
+ st.write(f"Processing: {pdf.name}")
56
+
57
+ # Extract text from PDF
58
+ text = read_pdf_data(pdf)
59
+ st.write(f"👉 Reading {pdf.name} done")
60
+
61
+ # Create chunks for this PDF
62
+ chunks = split_data(text)
63
+ all_chunks.extend(chunks)
64
+ st.write(f"👉 Splitting {pdf.name} into chunks done")
65
+
66
+ if not all_chunks:
67
+ st.error("❌ No valid chunks were created from the PDFs")
68
+ st.stop()
69
+
70
+ st.write("Creating embeddings...")
71
+ embeddings = create_embeddings()
72
+ st.write("👉 Creating embeddings instance done")
73
+
74
+ # Create vector store with all chunks
75
+ vector_store = create_vector_store(embeddings, all_chunks)
76
+ st.session_state.vector_store = vector_store
77
+
78
+ st.success(f"✅ Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
79
+ st.write(f"Total chunks created: {len(all_chunks)}")
80
+
81
+ except Exception as e:
82
+ st.error(f"❌ An unexpected error occurred: {str(e)}")
83
+
84
+
85
+ if __name__ == '__main__':
86
+ main()
pages/Pending_tickets.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title('Departments')
4
+
5
+ # Create tabs
6
+ tab_titles = ['HR Support', 'IT Support', 'Transportation Support', 'Other']
7
+ tabs = st.tabs(tab_titles)
8
+
9
+ # Add content to each tab...
10
+ with tabs[0]:
11
+ st.header('HR Support tickets')
12
+ for ticket in st.session_state.categories["HR Support"]:
13
+ st.write(str( st.session_state.categories["HR Support"].index(ticket)+1)+" : "+ticket)
14
+
15
+ with tabs[1]:
16
+ st.header('IT Support tickets')
17
+ for ticket in st.session_state.categories['IT Support']:
18
+ st.write(str(st.session_state.categories['IT Support'].index(ticket)+1)+" : "+ticket)
19
+
20
+ with tabs[2]:
21
+ st.header('Transportation Support tickets')
22
+ for ticket in st.session_state.categories['Transportation Support']:
23
+ st.write(str(st.session_state.categories['Transportation Support'].index(ticket)+1)+" : "+ticket)
24
+
25
+ with tabs[3]:
26
+ st.header('Other tickets')
27
+ for ticket in st.session_state.categories['Other']:
28
+ st.write(str(st.session_state.categories['Other'].index(ticket)+1)+" : "+ticket)
29
+
pages/__pycache__/_admin_util.cpython-311.pyc ADDED
Binary file (5.67 kB). View file
 
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "midterm"
3
+ version = "0.1.0"
4
+ description = "intelligent customer support chat"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "pydantic==2.10.1",
9
+ "langchain-core==0.3.31",
10
+ "langchain==0.3.15",
11
+ "langchain-community==0.3.15",
12
+ "langchain-openai==0.3.1",
13
+ "langchain-qdrant==0.2.0",
14
+ "qdrant-client==1.13.2",
15
+ "tiktoken>=0.8.0",
16
+ "pymupdf==1.25.2",
17
+ "langgraph>=0.2.67",
18
+ "langsmith>=0.3.1",
19
+ "openai>=1.58.1",
20
+ "cohere>=5.13.11",
21
+ "lxml>=5.3.0",
22
+ "streamlit>=1.29.0",
23
+ "websockets"
24
+ ]
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.13
2
+ streamlit==1.29.0
3
+ openai==1.14.2
4
+ tiktoken>=0.6.0
5
+ python-dotenv==1.0.1
6
+ pinecone-client==3.2.0
7
+ pypdf==4.0.1
8
+ joblib==1.3.2
9
+ pandas==2.2.0
10
+ scikit-learn==1.4.0
11
+ sentence-transformers==2.5.1
12
+ langchain-openai==0.1.0
13
+ PyPDF2
14
+ pymupdf
15
+ langchain-core>=0.1.0
16
+ qdrant-client>=1.7.0
17
+ langchain-qdrant>=0.1.0
utils/__pycache__/_admin_util.cpython-311.pyc ADDED
Binary file (8.86 kB). View file
 
utils/_admin_util.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiktoken
3
+ import PyPDF2
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_openai.embeddings import OpenAIEmbeddings
6
+ from langchain_qdrant import QdrantVectorStore
7
+ from qdrant_client import QdrantClient
8
+ from qdrant_client.http.models import Distance, VectorParams
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from langchain_core.output_parsers import StrOutputParser
11
+ from langchain_openai import ChatOpenAI
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ import streamlit as st
14
+
15
+
16
+
17
+ HUMAN_TEMPLATE = """
18
+ #CONTEXT:
19
+ {context}
20
+
21
+ QUERY:
22
+ {query}
23
+
24
+ Use the provide context to answer the provided user query. Only use the provided context to answer the query. If you do not know the answer, or it's not contained in the provided context response with "I don't know"
25
+ """
26
+
27
+ # Define the system prompt for categorization
28
+ CATEGORY_PROMPT = """You are a ticket categorization system. Categorize the following query into exactly one of these categories:
29
+ - HR Support: For queries about employment, benefits, leaves, workplace policies, etc.
30
+ - IT Support: For queries about software, hardware, network, system access, etc.
31
+ - Transportation Support: For queries about company transport, parking, vehicle maintenance, etc.
32
+ - Other: For queries that do not fit into the above categories.
33
+ Respond with ONLY the category name, nothing else.
34
+
35
+ Query: {query}
36
+ """
37
+
38
+ def check_api_key():
39
+ """Verify that the API key is set and valid"""
40
+ api_key = os.getenv("OPENAI_API_KEY")
41
+ if not api_key:
42
+ raise ValueError("OpenAI API key not found in environment variables")
43
+ return api_key
44
+
45
+ #Read PDF data
46
+ def read_pdf_data(pdf_file):
47
+ try:
48
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
49
+ text = ""
50
+ for page in pdf_reader.pages:
51
+ text += page.extract_text()
52
+ if not text.strip():
53
+ raise ValueError("No text extracted from PDF")
54
+ return text
55
+ except Exception as e:
56
+ raise Exception(f"Error reading PDF: {str(e)}")
57
+
58
+ def tiktoken_len(text):
59
+ try:
60
+ tokens = tiktoken.encoding_for_model("gpt-4").encode(text)
61
+ return len(tokens)
62
+ except Exception as e:
63
+ raise Exception(f"Error in token calculation: {str(e)}")
64
+
65
+ #Split data into chunks
66
+ def split_data(text):
67
+ try:
68
+ text_splitter = RecursiveCharacterTextSplitter(
69
+ chunk_size=500, # Increased for better context
70
+ chunk_overlap=50, # Added overlap for better continuity
71
+ length_function=tiktoken_len,
72
+ separators=["\n\n", "\n", " ", ""]
73
+ )
74
+ chunks = text_splitter.split_text(text)
75
+ if not chunks:
76
+ raise ValueError("Text splitting produced no chunks")
77
+ return chunks
78
+ except Exception as e:
79
+ raise Exception(f"Error splitting text: {str(e)}")
80
+
81
+ #Create embeddings instance
82
+
83
+ def create_embeddings():
84
+ try:
85
+ api_key = check_api_key()
86
+ embedding_model = OpenAIEmbeddings(
87
+ model="text-embedding-3-small",
88
+ openai_api_key=api_key
89
+ )
90
+ return embedding_model
91
+ except Exception as e:
92
+ raise Exception(f"Error creating embeddings model: {str(e)}")
93
+
94
+
95
+ # Create a vector database using Qdrant
96
+ def create_vector_store(embedding_model, chunks):
97
+ try:
98
+ embedding_dim = 1536
99
+ client = QdrantClient(":memory:") # Consider using persistent storage for production
100
+
101
+ # Create collection with error handling
102
+ try:
103
+ client.create_collection(
104
+ collection_name="lcel_doc_v2",
105
+ vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE),
106
+ )
107
+ except Exception as e:
108
+ raise Exception(f"Error creating Qdrant collection: {str(e)}")
109
+
110
+ vector_store = QdrantVectorStore(
111
+ client=client,
112
+ collection_name="lcel_doc_v2",
113
+ embedding=embedding_model,
114
+ )
115
+
116
+ # Add texts with progress tracking
117
+ try:
118
+ _ = vector_store.add_texts(texts=chunks)
119
+ except Exception as e:
120
+ raise Exception(f"Error adding texts to vector store: {str(e)}")
121
+
122
+ return vector_store
123
+ except Exception as e:
124
+ raise Exception(f"Error in vector store creation: {str(e)}")
125
+
126
+ # create RAG
127
+ def create_rag(vector_store):
128
+ try:
129
+ api_key = check_api_key()
130
+ openai_chat_model = ChatOpenAI(
131
+ model="gpt-3.5-turbo",
132
+ openai_api_key=api_key,
133
+ temperature=0.7
134
+ )
135
+
136
+ chat_prompt = ChatPromptTemplate.from_messages([
137
+ ("system", "You are a helpful assistant that answers questions based on the provided context."),
138
+ ("human", HUMAN_TEMPLATE)
139
+ ])
140
+
141
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
142
+
143
+ simple_rag = (
144
+ {"context": retriever, "query": RunnablePassthrough()}
145
+ | chat_prompt
146
+ | openai_chat_model
147
+ | StrOutputParser()
148
+ )
149
+
150
+ return simple_rag
151
+ except Exception as e:
152
+ raise Exception(f"Error creating RAG chain: {str(e)}")
153
+
154
+ # Invoke RAG
155
+ def invoke_rag(vector_store, query):
156
+ try:
157
+ rag_chain = create_rag(vector_store)
158
+ response = rag_chain.invoke(query)
159
+ return response
160
+ except Exception as e:
161
+ raise Exception(f"Error invoking RAG chain: {str(e)}")
162
+
163
+
164
+ def get_ticket_category(query):
165
+ try:
166
+ api_key = check_api_key()
167
+ client = ChatOpenAI(
168
+ model="gpt-3.5-turbo",
169
+ openai_api_key=api_key,
170
+ temperature=0
171
+ )
172
+
173
+ prompt = ChatPromptTemplate.from_messages([
174
+ ("system", CATEGORY_PROMPT)
175
+ ])
176
+
177
+ chain = prompt | client | StrOutputParser()
178
+ category = chain.invoke({"query": query})
179
+
180
+ category = category.strip()
181
+ valid_categories = ["HR Support", "IT Support", "Transportation Support", "Other"]
182
+
183
+ return category if category in valid_categories else "Other"
184
+ except Exception as e:
185
+ st.error(f"Error in category classification: {str(e)}")
186
+ return "Other" # Fallback category
187
+
188
+