AkashVD26 commited on
Commit
5ead7e3
Β·
1 Parent(s): 755a6f7

Final app final version

Browse files
Files changed (2) hide show
  1. app.py +86 -62
  2. trialapp.py β†’ trial.py +62 -86
app.py CHANGED
@@ -6,112 +6,136 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.chat_message_histories import ChatMessageHistory
8
  from langchain_core.chat_history import BaseChatMessageHistory
9
- from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
10
  from langchain_groq import ChatGroq
11
  from langchain_core.runnables.history import RunnableWithMessageHistory
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
  from langchain_community.document_loaders import PyPDFLoader
15
- from langchain_core.output_parsers import StrOutputParser
16
  import os
17
  from dotenv import load_dotenv
 
 
18
  load_dotenv()
19
 
20
- # API and model setting
21
- os.environ['HF_TOKEN']=os.getenv('HF_TOKEN')
22
- os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
23
- embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
 
25
  # Streamlit app
26
- st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π—Œπ–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖼𝗁𝖺𝗍 π—π—‚π—Œπ—π—ˆπ—‹π—’")
27
- st.write("upload pdfs and ask questions related to pdfs")
28
- llm=ChatGroq(model="Gemma2-9b-It")
29
- session_id=st.text_input("Session id",value="common_session")
 
30
 
31
- # manage chat history
32
  if 'store' not in st.session_state:
33
- st.session_state.store={}
34
-
35
- # Upload files and documents loading
36
- uploaded_files=st.file_uploader("Drop the pdf files here",type="pdf",accept_multiple_files=True)
 
37
  if uploaded_files:
38
- documents=[]
39
  for uploaded_file in uploaded_files:
40
- temppdf=f"./temp.pdf"
41
- with open(temppdf,"wb") as file:
42
  file.write(uploaded_file.getvalue())
43
- file_name=uploaded_file.name
44
- docs=PyPDFLoader(temppdf).load()
45
  documents.extend(docs)
 
46
  # Delete the temp file as we no longer need it
47
  if os.path.exists("./temp.pdf"):
48
  os.remove("./temp.pdf")
49
- # Text splitting and embedding and storing in chromadb
50
- text_splitter=RecursiveCharacterTextSplitter(chunk_size=5000,chunk_overlap=500)
51
- splits=text_splitter.split_documents(documents)
 
52
  faiss_index = FAISS.from_documents(splits, embeddings)
53
- retriever=faiss_index.as_retriever()
54
 
55
  # Prompts
56
- context_system_prompt=(
57
- "Given a chat history and latest user question"
58
  "which might reference context in the chat history, "
59
- "formulate a standalone question which can be understood "
60
- "without the chat history. Do Not answer the question, "
61
- "just reformulate it if needed and otherwise return it as it is"
62
  )
63
- context_prompt=ChatPromptTemplate.from_messages([
64
- ("system",context_system_prompt),
65
  MessagesPlaceholder("chat_history"),
66
- ("human","{input}")]
67
- )
68
 
69
- history_aware_ret=create_history_aware_retriever(llm,retriever,context_prompt)
70
 
71
- system_prompt=(
72
- "You are 'PDFSense' a PDF reading and answering assistant. "
73
  "Use the following pieces of retrieved context to answer "
74
- "the question. If you don't know the answer, say that you dont know."
75
  "Answer the questions nicely."
76
  "\n\n"
77
  "{context}"
78
  )
79
 
80
- prompt=ChatPromptTemplate.from_messages(
81
- [
82
- ("system",system_prompt),
83
- MessagesPlaceholder("chat_history"),
84
- ("human","{input}")
85
- ]
86
- )
87
  # Chain for the chatbot
88
- qa_chain=create_stuff_documents_chain(llm,prompt)
89
- rag_chain=create_retrieval_chain(history_aware_ret,qa_chain)
90
 
91
- # Session Id storing in chat history
92
- def get_session_history(session:str)-> BaseChatMessageHistory:
93
  if session_id not in st.session_state.store:
94
- st.session_state.store[session_id]=ChatMessageHistory()
95
  return st.session_state.store[session_id]
96
-
97
  # RAG with history
98
- conversation_rag=RunnableWithMessageHistory(
99
  rag_chain,
100
  get_session_history,
101
  input_messages_key="input",
102
  history_messages_key="chat_history",
103
- output_messages_key="answer"
104
- )
105
-
106
- user_input=st.text_input("Enter question")
107
  if user_input:
108
- session_history=get_session_history(session_id)
109
- response=conversation_rag.invoke(
110
- {"input":user_input},
111
  config={
112
- "configurable":{"session_id":session_id}
113
  },
114
  )
115
- st.write(st.session_state.store)
116
- st.write("Assistant:",response['answer'])
117
- st.write("Chat History",session_history.messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.chat_message_histories import ChatMessageHistory
8
  from langchain_core.chat_history import BaseChatMessageHistory
9
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain_groq import ChatGroq
11
  from langchain_core.runnables.history import RunnableWithMessageHistory
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
  from langchain_community.document_loaders import PyPDFLoader
 
15
  import os
16
  from dotenv import load_dotenv
17
+
18
+ # Load environment variables
19
  load_dotenv()
20
 
21
+ # API and model settings
22
+ os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
23
+ os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
24
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
25
 
26
  # Streamlit app
27
+ st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π–²π–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖒𝗁𝖺𝗍 π–§π—‚π—Œπ—π—ˆπ—‹π—’")
28
+ st.markdown('####')
29
+ st.write("Upload PDFs and ask questions related to the content of the PDFs.")
30
+ llm = ChatGroq(model="Gemma2-9b-It")
31
+ session_id = st.text_input("Session ID", value="common_session")
32
 
33
+ # Manage chat history
34
  if 'store' not in st.session_state:
35
+ st.session_state.store = {}
36
+ st.markdown('####')
37
+ # Upload files and document loading
38
+ uploaded_files = st.file_uploader("Drop the PDF files here", type="pdf", accept_multiple_files=True)
39
+ st.markdown('####')
40
  if uploaded_files:
41
+ documents = []
42
  for uploaded_file in uploaded_files:
43
+ temppdf = f"./temp.pdf"
44
+ with open(temppdf, "wb") as file:
45
  file.write(uploaded_file.getvalue())
46
+ docs = PyPDFLoader(temppdf).load()
 
47
  documents.extend(docs)
48
+
49
  # Delete the temp file as we no longer need it
50
  if os.path.exists("./temp.pdf"):
51
  os.remove("./temp.pdf")
52
+
53
+ # Text splitting and embedding, storing in FAISS index
54
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
55
+ splits = text_splitter.split_documents(documents)
56
  faiss_index = FAISS.from_documents(splits, embeddings)
57
+ retriever = faiss_index.as_retriever()
58
 
59
  # Prompts
60
+ context_system_prompt = (
61
+ "Given a chat history and the latest user question, "
62
  "which might reference context in the chat history, "
63
+ "formulate a standalone question that can be understood "
64
+ "without the chat history. Do not answer the question, "
65
+ "just reformulate it if needed and otherwise return it as it is."
66
  )
67
+ context_prompt = ChatPromptTemplate.from_messages([
68
+ ("system", context_system_prompt),
69
  MessagesPlaceholder("chat_history"),
70
+ ("human", "{input}")
71
+ ])
72
 
73
+ history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt)
74
 
75
+ system_prompt = (
76
+ "You are 'PDFSense', a PDF reading and answering assistant. "
77
  "Use the following pieces of retrieved context to answer "
78
+ "the question. If you don't know the answer, say that you don't know. "
79
  "Answer the questions nicely."
80
  "\n\n"
81
  "{context}"
82
  )
83
 
84
+ prompt = ChatPromptTemplate.from_messages([
85
+ ("system", system_prompt),
86
+ MessagesPlaceholder("chat_history"),
87
+ ("human", "{input}")
88
+ ])
89
+
 
90
  # Chain for the chatbot
91
+ qa_chain = create_stuff_documents_chain(llm, prompt)
92
+ rag_chain = create_retrieval_chain(history_aware_ret, qa_chain)
93
 
94
+ # Session ID storing in chat history
95
+ def get_session_history(session: str) -> BaseChatMessageHistory:
96
  if session_id not in st.session_state.store:
97
+ st.session_state.store[session_id] = ChatMessageHistory()
98
  return st.session_state.store[session_id]
99
+
100
  # RAG with history
101
+ conversation_rag = RunnableWithMessageHistory(
102
  rag_chain,
103
  get_session_history,
104
  input_messages_key="input",
105
  history_messages_key="chat_history",
106
+ output_messages_key="answer"
107
+ )
108
+
109
+ user_input = st.text_input("Enter your question")
110
  if user_input:
111
+ session_history = get_session_history(session_id)
112
+ response = conversation_rag.invoke(
113
+ {"input": user_input},
114
  config={
115
+ "configurable": {"session_id": session_id}
116
  },
117
  )
118
+
119
+ st.write("### Response")
120
+ st.success(response['answer'])
121
+ # Display the chat history
122
+ st.write("### Chat History")
123
+ for message in session_history.messages:
124
+ if isinstance(message, dict): # Handle cases where messages might be dictionaries
125
+ role = message.get("role", "user") # Default role is 'user'
126
+ content = message.get("content", "")
127
+ else:
128
+ # For LangChain message objects
129
+ role = "user" if isinstance(message, ChatMessageHistory) else "assistant"
130
+ content = message.content
131
+
132
+ if role == "user":
133
+ with st.chat_message("user"):
134
+ st.success(content)
135
+ elif role == "assistant":
136
+ with st.chat_message("assistant"):
137
+ st.success(content)
138
+ elif role == "system":
139
+ with st.chat_message("system"):
140
+ st.markdown(f"**System Message:** {content}")
141
+ #st.write("Assistant:", response['answer'])
trialapp.py β†’ trial.py RENAMED
@@ -6,136 +6,112 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.chat_message_histories import ChatMessageHistory
8
  from langchain_core.chat_history import BaseChatMessageHistory
9
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain_groq import ChatGroq
11
  from langchain_core.runnables.history import RunnableWithMessageHistory
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
  from langchain_community.document_loaders import PyPDFLoader
 
15
  import os
16
  from dotenv import load_dotenv
17
-
18
- # Load environment variables
19
  load_dotenv()
20
 
21
- # API and model settings
22
- os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
23
- os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
24
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
25
 
26
  # Streamlit app
27
- st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π–²π–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖒𝗁𝖺𝗍 π–§π—‚π—Œπ—π—ˆπ—‹π—’")
28
- st.markdown('####')
29
- st.write("Upload PDFs and ask questions related to the content of the PDFs.")
30
- llm = ChatGroq(model="Gemma2-9b-It")
31
- session_id = st.text_input("Session ID", value="common_session")
32
 
33
- # Manage chat history
34
  if 'store' not in st.session_state:
35
- st.session_state.store = {}
36
- st.markdown('####')
37
- # Upload files and document loading
38
- uploaded_files = st.file_uploader("Drop the PDF files here", type="pdf", accept_multiple_files=True)
39
- st.markdown('####')
40
  if uploaded_files:
41
- documents = []
42
  for uploaded_file in uploaded_files:
43
- temppdf = f"./temp.pdf"
44
- with open(temppdf, "wb") as file:
45
  file.write(uploaded_file.getvalue())
46
- docs = PyPDFLoader(temppdf).load()
 
47
  documents.extend(docs)
48
-
49
  # Delete the temp file as we no longer need it
50
  if os.path.exists("./temp.pdf"):
51
  os.remove("./temp.pdf")
52
-
53
- # Text splitting and embedding, storing in FAISS index
54
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=500)
55
- splits = text_splitter.split_documents(documents)
56
  faiss_index = FAISS.from_documents(splits, embeddings)
57
- retriever = faiss_index.as_retriever()
58
 
59
  # Prompts
60
- context_system_prompt = (
61
- "Given a chat history and the latest user question, "
62
  "which might reference context in the chat history, "
63
- "formulate a standalone question that can be understood "
64
- "without the chat history. Do not answer the question, "
65
- "just reformulate it if needed and otherwise return it as it is."
66
  )
67
- context_prompt = ChatPromptTemplate.from_messages([
68
- ("system", context_system_prompt),
69
  MessagesPlaceholder("chat_history"),
70
- ("human", "{input}")
71
- ])
72
 
73
- history_aware_ret = create_history_aware_retriever(llm, retriever, context_prompt)
74
 
75
- system_prompt = (
76
- "You are 'PDFSense', a PDF reading and answering assistant. "
77
  "Use the following pieces of retrieved context to answer "
78
- "the question. If you don't know the answer, say that you don't know. "
79
  "Answer the questions nicely."
80
  "\n\n"
81
  "{context}"
82
  )
83
 
84
- prompt = ChatPromptTemplate.from_messages([
85
- ("system", system_prompt),
86
- MessagesPlaceholder("chat_history"),
87
- ("human", "{input}")
88
- ])
89
-
 
90
  # Chain for the chatbot
91
- qa_chain = create_stuff_documents_chain(llm, prompt)
92
- rag_chain = create_retrieval_chain(history_aware_ret, qa_chain)
93
 
94
- # Session ID storing in chat history
95
- def get_session_history(session: str) -> BaseChatMessageHistory:
96
  if session_id not in st.session_state.store:
97
- st.session_state.store[session_id] = ChatMessageHistory()
98
  return st.session_state.store[session_id]
99
-
100
  # RAG with history
101
- conversation_rag = RunnableWithMessageHistory(
102
  rag_chain,
103
  get_session_history,
104
  input_messages_key="input",
105
  history_messages_key="chat_history",
106
- output_messages_key="answer"
107
- )
108
-
109
- user_input = st.text_input("Enter your question")
110
  if user_input:
111
- session_history = get_session_history(session_id)
112
- response = conversation_rag.invoke(
113
- {"input": user_input},
114
  config={
115
- "configurable": {"session_id": session_id}
116
  },
117
  )
118
-
119
- st.write("### Response")
120
- st.success(response['answer'])
121
- # Display the chat history
122
- st.write("### Chat History")
123
- for message in session_history.messages:
124
- if isinstance(message, dict): # Handle cases where messages might be dictionaries
125
- role = message.get("role", "user") # Default role is 'user'
126
- content = message.get("content", "")
127
- else:
128
- # For LangChain message objects
129
- role = "user" if isinstance(message, ChatMessageHistory) else "assistant"
130
- content = message.content
131
-
132
- if role == "user":
133
- with st.chat_message("user"):
134
- st.success(content)
135
- elif role == "assistant":
136
- with st.chat_message("assistant"):
137
- st.success(content)
138
- elif role == "system":
139
- with st.chat_message("system"):
140
- st.markdown(f"**System Message:** {content}")
141
- #st.write("Assistant:", response['answer'])
 
6
  from langchain_community.vectorstores import FAISS
7
  from langchain_community.chat_message_histories import ChatMessageHistory
8
  from langchain_core.chat_history import BaseChatMessageHistory
9
+ from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
10
  from langchain_groq import ChatGroq
11
  from langchain_core.runnables.history import RunnableWithMessageHistory
12
  from langchain_huggingface import HuggingFaceEmbeddings
13
  from langchain_text_splitters import RecursiveCharacterTextSplitter
14
  from langchain_community.document_loaders import PyPDFLoader
15
+ from langchain_core.output_parsers import StrOutputParser
16
  import os
17
  from dotenv import load_dotenv
 
 
18
  load_dotenv()
19
 
20
+ # API and model setting
21
+ os.environ['HF_TOKEN']=os.getenv('HF_TOKEN')
22
+ os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
23
+ embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
24
 
25
  # Streamlit app
26
+ st.title("𝖯𝖣π–₯π–²π–Ύπ—‡π—Œπ–Ύ : 𝖯𝖣π–₯ π–°π—Žπ–Ύπ—Œπ—π—‚π—ˆπ—‡ 𝖺𝗇𝖽 π– π—‡π—Œπ—π–Ύπ—‹π—‚π—‡π—€ 𝗐𝗂𝗍𝗁 π—Œπ–Ύπ—Œπ—Œπ—‚π—ˆπ—‡ 𝖼𝗁𝖺𝗍 π—π—‚π—Œπ—π—ˆπ—‹π—’")
27
+ st.write("upload pdfs and ask questions related to pdfs")
28
+ llm=ChatGroq(model="Gemma2-9b-It")
29
+ session_id=st.text_input("Session id",value="common_session")
 
30
 
31
+ # manage chat history
32
  if 'store' not in st.session_state:
33
+ st.session_state.store={}
34
+
35
+ # Upload files and documents loading
36
+ uploaded_files=st.file_uploader("Drop the pdf files here",type="pdf",accept_multiple_files=True)
 
37
  if uploaded_files:
38
+ documents=[]
39
  for uploaded_file in uploaded_files:
40
+ temppdf=f"./temp.pdf"
41
+ with open(temppdf,"wb") as file:
42
  file.write(uploaded_file.getvalue())
43
+ file_name=uploaded_file.name
44
+ docs=PyPDFLoader(temppdf).load()
45
  documents.extend(docs)
 
46
  # Delete the temp file as we no longer need it
47
  if os.path.exists("./temp.pdf"):
48
  os.remove("./temp.pdf")
49
+ # Text splitting and embedding and storing in chromadb
50
+ text_splitter=RecursiveCharacterTextSplitter(chunk_size=5000,chunk_overlap=500)
51
+ splits=text_splitter.split_documents(documents)
 
52
  faiss_index = FAISS.from_documents(splits, embeddings)
53
+ retriever=faiss_index.as_retriever()
54
 
55
  # Prompts
56
+ context_system_prompt=(
57
+ "Given a chat history and latest user question"
58
  "which might reference context in the chat history, "
59
+ "formulate a standalone question which can be understood "
60
+ "without the chat history. Do Not answer the question, "
61
+ "just reformulate it if needed and otherwise return it as it is"
62
  )
63
+ context_prompt=ChatPromptTemplate.from_messages([
64
+ ("system",context_system_prompt),
65
  MessagesPlaceholder("chat_history"),
66
+ ("human","{input}")]
67
+ )
68
 
69
+ history_aware_ret=create_history_aware_retriever(llm,retriever,context_prompt)
70
 
71
+ system_prompt=(
72
+ "You are 'PDFSense' a PDF reading and answering assistant. "
73
  "Use the following pieces of retrieved context to answer "
74
+ "the question. If you don't know the answer, say that you dont know."
75
  "Answer the questions nicely."
76
  "\n\n"
77
  "{context}"
78
  )
79
 
80
+ prompt=ChatPromptTemplate.from_messages(
81
+ [
82
+ ("system",system_prompt),
83
+ MessagesPlaceholder("chat_history"),
84
+ ("human","{input}")
85
+ ]
86
+ )
87
  # Chain for the chatbot
88
+ qa_chain=create_stuff_documents_chain(llm,prompt)
89
+ rag_chain=create_retrieval_chain(history_aware_ret,qa_chain)
90
 
91
+ # Session Id storing in chat history
92
+ def get_session_history(session:str)-> BaseChatMessageHistory:
93
  if session_id not in st.session_state.store:
94
+ st.session_state.store[session_id]=ChatMessageHistory()
95
  return st.session_state.store[session_id]
96
+
97
  # RAG with history
98
+ conversation_rag=RunnableWithMessageHistory(
99
  rag_chain,
100
  get_session_history,
101
  input_messages_key="input",
102
  history_messages_key="chat_history",
103
+ output_messages_key="answer"
104
+ )
105
+
106
+ user_input=st.text_input("Enter question")
107
  if user_input:
108
+ session_history=get_session_history(session_id)
109
+ response=conversation_rag.invoke(
110
+ {"input":user_input},
111
  config={
112
+ "configurable":{"session_id":session_id}
113
  },
114
  )
115
+ st.write(st.session_state.store)
116
+ st.write("Assistant:",response['answer'])
117
+ st.write("Chat History",session_history.messages)