AkashVD26 commited on
Commit
4cbd953
Β·
1 Parent(s): 5ead7e3

best UI added for huggingface space

Browse files
Files changed (2) hide show
  1. app.py +40 -91
  2. trial.py +0 -117
app.py CHANGED
@@ -4,11 +4,8 @@ from langchain.chains.history_aware_retriever import create_history_aware_retrie
4
  from langchain.chains.retrieval import create_retrieval_chain
5
  from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_community.vectorstores import FAISS
7
- from langchain_community.chat_message_histories import ChatMessageHistory
8
- from langchain_core.chat_history import BaseChatMessageHistory
9
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain_groq import ChatGroq
11
- from langchain_core.runnables.history import RunnableWithMessageHistory
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
  from langchain_community.document_loaders import PyPDFLoader
@@ -17,125 +14,77 @@ from dotenv import load_dotenv
17
 
18
  # Load environment variables
19
  load_dotenv()
20
-
21
- # API and model settings
22
  os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
23
  os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
24
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
25
 
26
- # Streamlit app
27
- st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π–²π–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖒𝗁𝖺𝗍 π–§π—‚π—Œπ—π—ˆπ—‹π—’")
28
- st.markdown('####')
29
- st.write("Upload PDFs and ask questions related to the content of the PDFs.")
30
  llm = ChatGroq(model="Gemma2-9b-It")
31
- session_id = st.text_input("Session ID", value="common_session")
 
 
 
 
 
32
 
33
- # Manage chat history
34
- if 'store' not in st.session_state:
35
- st.session_state.store = {}
36
- st.markdown('####')
37
- # Upload files and document loading
38
- uploaded_files = st.file_uploader("Drop the PDF files here", type="pdf", accept_multiple_files=True)
39
- st.markdown('####')
40
  if uploaded_files:
41
  documents = []
42
  for uploaded_file in uploaded_files:
43
- temppdf = f"./temp.pdf"
44
  with open(temppdf, "wb") as file:
45
  file.write(uploaded_file.getvalue())
46
  docs = PyPDFLoader(temppdf).load()
47
  documents.extend(docs)
 
48
 
49
- # Delete the temp file as we no longer need it
50
- if os.path.exists("./temp.pdf"):
51
- os.remove("./temp.pdf")
52
-
53
- # Text splitting and embedding, storing in FAISS index
54
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
55
  splits = text_splitter.split_documents(documents)
56
  faiss_index = FAISS.from_documents(splits, embeddings)
57
  retriever = faiss_index.as_retriever()
58
 
59
- # Prompts
60
- context_system_prompt = (
61
- "Given a chat history and the latest user question, "
62
- "which might reference context in the chat history, "
63
- "formulate a standalone question that can be understood "
64
- "without the chat history. Do not answer the question, "
65
- "just reformulate it if needed and otherwise return it as it is."
66
- )
67
  context_prompt = ChatPromptTemplate.from_messages([
68
- ("system", context_system_prompt),
69
  MessagesPlaceholder("chat_history"),
70
  ("human", "{input}")
71
  ])
72
-
73
  history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt)
74
 
75
  system_prompt = (
76
- "You are 'PDFSense', a PDF reading and answering assistant. "
77
- "Use the following pieces of retrieved context to answer "
78
- "the question. If you don't know the answer, say that you don't know. "
79
- "Answer the questions nicely."
80
- "\n\n"
81
- "{context}"
82
  )
83
-
84
  prompt = ChatPromptTemplate.from_messages([
85
  ("system", system_prompt),
86
  MessagesPlaceholder("chat_history"),
87
  ("human", "{input}")
88
  ])
89
-
90
- # Chain for the chatbot
91
  qa_chain = create_stuff_documents_chain(llm, prompt)
92
  rag_chain = create_retrieval_chain(history_aware_ret, qa_chain)
93
 
94
- # Session ID storing in chat history
95
- def get_session_history(session: str) -> BaseChatMessageHistory:
96
- if session_id not in st.session_state.store:
97
- st.session_state.store[session_id] = ChatMessageHistory()
98
- return st.session_state.store[session_id]
99
-
100
- # RAG with history
101
- conversation_rag = RunnableWithMessageHistory(
102
- rag_chain,
103
- get_session_history,
104
- input_messages_key="input",
105
- history_messages_key="chat_history",
106
- output_messages_key="answer"
107
- )
108
-
109
- user_input = st.text_input("Enter your question")
110
- if user_input:
111
- session_history = get_session_history(session_id)
112
- response = conversation_rag.invoke(
113
- {"input": user_input},
114
- config={
115
- "configurable": {"session_id": session_id}
116
- },
117
- )
118
-
119
- st.write("### Response")
120
- st.success(response['answer'])
121
- # Display the chat history
122
- st.write("### Chat History")
123
- for message in session_history.messages:
124
- if isinstance(message, dict): # Handle cases where messages might be dictionaries
125
- role = message.get("role", "user") # Default role is 'user'
126
- content = message.get("content", "")
127
- else:
128
- # For LangChain message objects
129
- role = "user" if isinstance(message, ChatMessageHistory) else "assistant"
130
- content = message.content
131
-
132
- if role == "user":
133
- with st.chat_message("user"):
134
- st.success(content)
135
- elif role == "assistant":
136
- with st.chat_message("assistant"):
137
- st.success(content)
138
- elif role == "system":
139
- with st.chat_message("system"):
140
- st.markdown(f"**System Message:** {content}")
141
- #st.write("Assistant:", response['answer'])
 
4
  from langchain.chains.retrieval import create_retrieval_chain
5
  from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_community.vectorstores import FAISS
 
 
7
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
8
  from langchain_groq import ChatGroq
 
9
  from langchain_huggingface import HuggingFaceEmbeddings
10
  from langchain_text_splitters import RecursiveCharacterTextSplitter
11
  from langchain_community.document_loaders import PyPDFLoader
 
14
 
15
  # Load environment variables
16
  load_dotenv()
 
 
17
  os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
18
  os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
 
19
 
20
+ # Embeddings and LLM initialization
21
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
 
22
  llm = ChatGroq(model="Gemma2-9b-It")
23
+ st.set_page_config(page_title="PDFSense", page_icon="πŸ“„")
24
+ # Streamlit app title
25
+ st.title("πŸ“„ PDFSense: PDF Question Answering with Chat History")
26
+
27
+ # PDF Uploader Section (Keeps it at the top)
28
+ uploaded_files = st.file_uploader("Drop PDF files here", type="pdf", accept_multiple_files=True)
29
 
30
+ # Initialize chat history
31
+ if "messages" not in st.session_state:
32
+ st.session_state["messages"] = [
33
+ {"role": "assistant", "content": "Hi! I am PDFSense. Upload your PDF and ask me anything related to it."}
34
+ ]
35
+
36
+ # Process PDFs if uploaded
37
  if uploaded_files:
38
  documents = []
39
  for uploaded_file in uploaded_files:
40
+ temppdf = "./temp.pdf"
41
  with open(temppdf, "wb") as file:
42
  file.write(uploaded_file.getvalue())
43
  docs = PyPDFLoader(temppdf).load()
44
  documents.extend(docs)
45
+ os.remove("./temp.pdf") # Clean up temporary file
46
 
47
+ # Text splitting and FAISS index creation
 
 
 
 
48
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
49
  splits = text_splitter.split_documents(documents)
50
  faiss_index = FAISS.from_documents(splits, embeddings)
51
  retriever = faiss_index.as_retriever()
52
 
53
+ # History-aware retriever and prompt setup
 
 
 
 
 
 
 
54
  context_prompt = ChatPromptTemplate.from_messages([
55
+ ("system", "Refactor the question using chat history for context."),
56
  MessagesPlaceholder("chat_history"),
57
  ("human", "{input}")
58
  ])
 
59
  history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt)
60
 
61
  system_prompt = (
62
+ "You are PDFSense, a PDF reading assistant. Use the following context to answer the question: "
63
+ "{context}. If unsure, respond with 'I don't know.'"
 
 
 
 
64
  )
 
65
  prompt = ChatPromptTemplate.from_messages([
66
  ("system", system_prompt),
67
  MessagesPlaceholder("chat_history"),
68
  ("human", "{input}")
69
  ])
 
 
70
  qa_chain = create_stuff_documents_chain(llm, prompt)
71
  rag_chain = create_retrieval_chain(history_aware_ret, qa_chain)
72
 
73
+ # Display chat history
74
+ for msg in st.session_state["messages"]:
75
+ st.chat_message(msg["role"]).write(msg["content"])
76
+
77
+ # User input handling
78
+ if user_input := st.chat_input(placeholder="Ask a question about your uploaded PDF..."):
79
+ st.session_state["messages"].append({"role": "user", "content": user_input})
80
+ st.chat_message("user").write(user_input)
81
+
82
+ # Run retrieval and answer generation using invoke()
83
+ with st.chat_message("assistant"):
84
+ chat_history = [{"role": msg["role"], "content": msg["content"]} for msg in st.session_state["messages"]]
85
+ result = rag_chain.invoke({"input": user_input, "chat_history": chat_history})
86
+
87
+ # Extract and display only the answer
88
+ answer = result.get("answer", "I don't know.")
89
+ st.session_state["messages"].append({"role": "assistant", "content": answer})
90
+ st.write(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trial.py DELETED
@@ -1,117 +0,0 @@
1
- # Importing libraries
2
- import streamlit as st
3
- from langchain.chains.history_aware_retriever import create_history_aware_retriever
4
- from langchain.chains.retrieval import create_retrieval_chain
5
- from langchain.chains.combine_documents import create_stuff_documents_chain
6
- from langchain_community.vectorstores import FAISS
7
- from langchain_community.chat_message_histories import ChatMessageHistory
8
- from langchain_core.chat_history import BaseChatMessageHistory
9
- from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
10
- from langchain_groq import ChatGroq
11
- from langchain_core.runnables.history import RunnableWithMessageHistory
12
- from langchain_huggingface import HuggingFaceEmbeddings
13
- from langchain_text_splitters import RecursiveCharacterTextSplitter
14
- from langchain_community.document_loaders import PyPDFLoader
15
- from langchain_core.output_parsers import StrOutputParser
16
- import os
17
- from dotenv import load_dotenv
18
- load_dotenv()
19
-
20
- # API and model setting
21
- os.environ['HF_TOKEN']=os.getenv('HF_TOKEN')
22
- os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
23
- embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
-
25
- # Streamlit app
26
- st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π—Œπ–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖼𝗁𝖺𝗍 π—π—‚π—Œπ—π—ˆπ—‹π—’")
27
- st.write("upload pdfs and ask questions related to pdfs")
28
- llm=ChatGroq(model="Gemma2-9b-It")
29
- session_id=st.text_input("Session id",value="common_session")
30
-
31
- # manage chat history
32
- if 'store' not in st.session_state:
33
- st.session_state.store={}
34
-
35
- # Upload files and documents loading
36
- uploaded_files=st.file_uploader("Drop the pdf files here",type="pdf",accept_multiple_files=True)
37
- if uploaded_files:
38
- documents=[]
39
- for uploaded_file in uploaded_files:
40
- temppdf=f"./temp.pdf"
41
- with open(temppdf,"wb") as file:
42
- file.write(uploaded_file.getvalue())
43
- file_name=uploaded_file.name
44
- docs=PyPDFLoader(temppdf).load()
45
- documents.extend(docs)
46
- # Delete the temp file as we no longer need it
47
- if os.path.exists("./temp.pdf"):
48
- os.remove("./temp.pdf")
49
- # Text splitting and embedding and storing in chromadb
50
- text_splitter=RecursiveCharacterTextSplitter(chunk_size=5000,chunk_overlap=500)
51
- splits=text_splitter.split_documents(documents)
52
- faiss_index = FAISS.from_documents(splits, embeddings)
53
- retriever=faiss_index.as_retriever()
54
-
55
- # Prompts
56
- context_system_prompt=(
57
- "Given a chat history and latest user question"
58
- "which might reference context in the chat history, "
59
- "formulate a standalone question which can be understood "
60
- "without the chat history. Do Not answer the question, "
61
- "just reformulate it if needed and otherwise return it as it is"
62
- )
63
- context_prompt=ChatPromptTemplate.from_messages([
64
- ("system",context_system_prompt),
65
- MessagesPlaceholder("chat_history"),
66
- ("human","{input}")]
67
- )
68
-
69
- history_aware_ret=create_history_aware_retriever(llm,retriever,context_prompt)
70
-
71
- system_prompt=(
72
- "You are 'PDFSense' a PDF reading and answering assistant. "
73
- "Use the following pieces of retrieved context to answer "
74
- "the question. If you don't know the answer, say that you dont know."
75
- "Answer the questions nicely."
76
- "\n\n"
77
- "{context}"
78
- )
79
-
80
- prompt=ChatPromptTemplate.from_messages(
81
- [
82
- ("system",system_prompt),
83
- MessagesPlaceholder("chat_history"),
84
- ("human","{input}")
85
- ]
86
- )
87
- # Chain for the chatbot
88
- qa_chain=create_stuff_documents_chain(llm,prompt)
89
- rag_chain=create_retrieval_chain(history_aware_ret,qa_chain)
90
-
91
- # Session Id storing in chat history
92
- def get_session_history(session:str)-> BaseChatMessageHistory:
93
- if session_id not in st.session_state.store:
94
- st.session_state.store[session_id]=ChatMessageHistory()
95
- return st.session_state.store[session_id]
96
-
97
- # RAG with history
98
- conversation_rag=RunnableWithMessageHistory(
99
- rag_chain,
100
- get_session_history,
101
- input_messages_key="input",
102
- history_messages_key="chat_history",
103
- output_messages_key="answer"
104
- )
105
-
106
- user_input=st.text_input("Enter question")
107
- if user_input:
108
- session_history=get_session_history(session_id)
109
- response=conversation_rag.invoke(
110
- {"input":user_input},
111
- config={
112
- "configurable":{"session_id":session_id}
113
- },
114
- )
115
- st.write(st.session_state.store)
116
- st.write("Assistant:",response['answer'])
117
- st.write("Chat History",session_history.messages)