imkhan107 commited on
Commit
f9d300b
·
1 Parent(s): d139ac8

initial commit

Browse files
Files changed (3) hide show
  1. app.py +117 -0
  2. app_2.py +145 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from base64 import b64encode
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from io import BytesIO
6
+ from PyPDF2 import PdfReader
7
+ from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader
8
+ from tempfile import NamedTemporaryFile
9
+
10
+ import google.generativeai as genai
11
+
12
+ load_dotenv() ## load all the environemnt variables
13
+ ## Configure Genai Key
14
+
15
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
+
17
+ def prepare_prompt(question, context):
18
+ if context is None:
19
+ return "Please upload a PDF first."
20
+
21
+ prompt= f"""
22
+ You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context.
23
+ Use the context prvided below and answer comprehensively to the question at the end.
24
+
25
+ Context: {context}
26
+
27
+ Question:{question}
28
+
29
+ """
30
+
31
+ return prompt
32
+
33
+
34
+
35
+ def get_gemini_response(prompt):
36
+ print(prompt)
37
+
38
+ model=genai.GenerativeModel('gemini-pro')
39
+ response=model.generate_content(prompt)
40
+ return response.text
41
+
42
+ def extract_text(uploaded_file):
43
+ """Extracts text from each page of a PDF using fitz.
44
+
45
+ Args:
46
+ pdf_bytes (bytes): The PDF content in bytes format.
47
+
48
+ Returns:
49
+ list: A list containing the extracted text from each page.
50
+ """
51
+
52
+ pages = []
53
+ if isinstance(uploaded_file, str): # Handle URL case
54
+ loader = OnlinePDFLoader(uploaded_file)
55
+ print("Fetching Url")
56
+ else:
57
+ pdf_reader = PdfReader(uploaded_file) # Handle uploaded file case
58
+
59
+ pages = []
60
+ for page in pdf_reader.pages:
61
+ pages.append(page.extract_text())
62
+
63
+ st.session_state["text"] = text
64
+
65
+
66
+ return pages
67
+
68
+
69
+ st.set_page_config(page_title="Waiwoph App", layout="wide")
70
+
71
+ # Beautiful interface elements
72
+ st.title("Talk to your files")
73
+ st.write("Upload a PDF document and enter your questions.")
74
+
75
+ uploaded_file = st.file_uploader("Choose a PDF file:", type="pdf")
76
+ text = None # Initialize text to store extracted content
77
+ convo=""
78
+ if st.session_state.get("convo") is not None:
79
+ convo=st.session_state.get("convo")
80
+ if uploaded_file is not None:
81
+ text = extract_text(uploaded_file)
82
+ st.success("PDF uploaded successfully!")
83
+
84
+ if text is not None:
85
+ questions = st.text_input("Ask Your Questions:")
86
+ answer_button = st.button("Ask", key="find_answers_button") # Initially enabled
87
+
88
+ if answer_button:
89
+ is_processing = False # Flag to track processing state
90
+ for question in questions.splitlines():
91
+ if not is_processing:
92
+ #answer_button.disabled = True # Disable button before processing
93
+ is_processing = True
94
+ with st.spinner("Processing..."):
95
+ prompt=prepare_prompt(question.strip(),text)
96
+ response=get_gemini_response(prompt)
97
+ convo=f'''{convo} \n\n**User:** {question} \n**Waiwoph:** {response}'''
98
+ st.write(convo)
99
+ st.session_state["convo"] = convo
100
+ # Clear question text area after each response
101
+ questions = ""
102
+ #st.text_input("Ask Your Questions:", value=questions) # Clear questions
103
+ is_processing = False # Reset processing flag
104
+ #answer_button.disabled = False # Re-enable button after all processed
105
+
106
+
107
+
108
+ if answer_button:
109
+ for question in questions.splitlines(): # Split questions at line breaks
110
+ prompt=prepare_prompt(question.strip(),text)
111
+ response=get_gemini_response(prompt)
112
+ convo=f'''{convo} \n**User:** {question} \n**Waiwoph:** {response}'''
113
+ st.write(convo)
114
+ st.session_state["convo"] = convo
115
+ print(convo)
116
+ #st.write("**Answer:**",response)
117
+ #st.text_area("Search Result", response, height=500)
app_2.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from base64 import b64encode
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from io import BytesIO
6
+ from PyPDF2 import PdfReader
7
+ from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader, OnlinePDFLoader
8
+ from tempfile import NamedTemporaryFile
9
+
10
+ import google.generativeai as genai
11
+
12
+ load_dotenv() ## load all the environemnt variables
13
+ ## Configure Genai Key
14
+
15
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
+
17
+ def prepare_prompt(question, context):
18
+ prompt= f"""
19
+ You are an expert in analyzing the context and providing accurate and comprehensive answers based on the context.
20
+ Use the context prvided below and answer comprehensively to the question at the end.
21
+
22
+ Context: {context}
23
+
24
+ Question:{question}
25
+
26
+ """
27
+
28
+ return prompt
29
+
30
+
31
+
32
+ def get_gemini_response(prompt):
33
+ print(prompt)
34
+ model=genai.GenerativeModel('gemini-pro')
35
+ response=model.generate_content(prompt)
36
+ return response.text
37
+
38
+ def extract_text(uploaded_file):
39
+ """Extracts text from each page of a PDF using fitz.
40
+
41
+ Args:
42
+ pdf_bytes (bytes): The PDF content in bytes format.
43
+
44
+ Returns:
45
+ list: A list containing the extracted text from each page.
46
+ """
47
+
48
+ pages = []
49
+ if isinstance(uploaded_file, str): # Handle URL case
50
+ loader = OnlinePDFLoader(uploaded_file)
51
+ print("Fetching Url")
52
+ else:
53
+ pdf_reader = PdfReader(uploaded_file) # Handle uploaded file case
54
+
55
+ pages = []
56
+ for page in pdf_reader.pages:
57
+ pages.append(page.extract_text())
58
+
59
+ print(pages)
60
+ return pages
61
+
62
+
63
+ def display_pdf(pdf_data,col):
64
+ """Displays the PDF using base64 encoding and an iframe."""
65
+ if pdf_data is not None:
66
+ base64_pdf = b64encode(pdf_data).decode('utf-8')
67
+ pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600px" type="application/pdf"></iframe>'
68
+ with col:
69
+ st.markdown(pdf_display, unsafe_allow_html=True)
70
+
71
+ def main():
72
+ """Streamlit app with PDF viewer and chat window in a two-row layout."""
73
+ st.set_page_config(page_title="PDF Viewer with Chat", page_icon="")
74
+
75
+ st.title("Upload or Enter a URL to View a PDF and Chat")
76
+
77
+ extracted_text=""
78
+ # Full page width layout
79
+ st.markdown("""
80
+ <style>
81
+ body {{ margin: 0; padding: 0; }}
82
+ </style>
83
+ """, unsafe_allow_html=True)
84
+
85
+ # Row 1 for file upload and URL
86
+ col1_1, col1_2 = st.columns(2)
87
+ with col1_1:
88
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
89
+ with col1_2:
90
+ pdf_url = st.text_input("Enter a PDF URL (optional)")
91
+
92
+ # Row 2 for PDF viewer and chat
93
+ col2_1, col2_2 = st.columns([1, 1]) # 66.6% for PDF viewer, 33.3% for chat
94
+
95
+ # Chat window (right column, hidden initially)
96
+ with col2_2:
97
+ chat_container = st.empty() # Placeholder for chat area
98
+ chat_history = st.empty() # Placeholder for chat history (optional)
99
+
100
+ def show_chat(extracted_text):
101
+ chat_container.empty() # Clear previous chat elements
102
+ with chat_container:
103
+ # User input for chat
104
+ with col2_2:
105
+ user_input = st.text_input("Type your query...", key="chat_input")
106
+
107
+ # Send button (optional)
108
+ # You can add a button here to trigger actions based on user input
109
+ # send_button = st.button("Send")
110
+
111
+ # Update chat history (optional)
112
+ if user_input:
113
+ # Here, you can process the user input and update the chat history
114
+ # You can use a DataFrame (like `chat_history`) to store and display messages
115
+ # This example just shows a placeholder for future implementation
116
+ prompt=prepare_prompt(user_input,extracted_text)
117
+ res=get_gemini_response(prompt)
118
+ with col2_2:
119
+ chat_history.text_area(f"""User: {user_input}\nAnswer: {res}""")
120
+ user_input.text_input("")
121
+
122
+ if uploaded_file is not None:
123
+ pdf_data = uploaded_file.getvalue()
124
+ #display_pdf(pdf_data,col2_1)
125
+ extracted_text = extract_text(uploaded_file)
126
+ show_chat(extracted_text) # Show chat window only after upload
127
+
128
+
129
+ #chat_history.text("User: " + user_input)
130
+ elif pdf_url:
131
+ try:
132
+ import requests
133
+ response = requests.get(pdf_url, stream=True)
134
+ if response.status_code == 200:
135
+ pdf_data = response.content
136
+ #display_pdf(pdf_data,col2_1)
137
+ extracted_text = extract_text(pdf_url)
138
+ show_chat(extracted_text) # Show chat window only after successful URL fetch
139
+ else:
140
+ st.error(f"Error: Failed to fetch PDF from URL. Status code: {response.status_code}")
141
+ except Exception as e:
142
+ st.error(f"Error: An error occurred while fetching PDF from URL: {e}")
143
+
144
+ if __name__ == '__main__':
145
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ python-dotenv
4
+ langchain
5
+ unstructured