shivam12323 commited on
Commit
b50cb28
·
verified ·
1 Parent(s): 0b28bda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.docstore.document import Document
8
+
9
+ # Streamlit UI for OpenAI API Key
10
+ st.title("📄 Chat with PDFs")
11
+ st.sidebar.title("Configuration")
12
+
13
+ # OpenAI API Key input
14
+ openai_api_key = st.sidebar.text_input(
15
+ "Enter your OpenAI API Key:", type="password"
16
+ )
17
+
18
+ if not openai_api_key:
19
+ st.warning("Please enter your OpenAI API Key in the sidebar.")
20
+ else:
21
+ os.environ["OPENAI_API_KEY"] = openai_api_key
22
+ llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
23
+
24
+ # File upload
25
+ uploaded_files = st.file_uploader(
26
+ "Upload one or more PDF files",
27
+ type="pdf",
28
+ accept_multiple_files=True
29
+ )
30
+
31
+ if uploaded_files:
32
+ def extract_text_from_pdfs(uploaded_files):
33
+ """Extract text content from uploaded PDF files."""
34
+ all_text = ""
35
+ for uploaded_file in uploaded_files:
36
+ pdf_reader = PdfReader(uploaded_file)
37
+ for page in pdf_reader.pages:
38
+ all_text += page.extract_text()
39
+ return all_text
40
+
41
+ def split_text_into_documents(text, chunk_size=1000, overlap=200):
42
+ """Split long text into manageable chunks."""
43
+ chunks = []
44
+ for i in range(0, len(text), chunk_size - overlap):
45
+ chunk = text[i:i + chunk_size]
46
+ chunks.append(Document(page_content=chunk))
47
+ return chunks
48
+
49
+ st.info("Extracting text from PDFs...")
50
+ raw_text = extract_text_from_pdfs(uploaded_files)
51
+ st.success("Text extracted successfully!")
52
+
53
+ # Split text into chunks
54
+ st.info("Splitting text into smaller chunks...")
55
+ documents = split_text_into_documents(raw_text)
56
+ st.success(f"Text split into {len(documents)} chunks.")
57
+
58
+ # Ask questions
59
+ st.subheader("Ask questions about your PDFs:")
60
+ question = st.text_input("Enter your question:")
61
+
62
+ if question:
63
+ # Load QA chain
64
+ chain = load_qa_chain(llm, chain_type="stuff")
65
+ st.info("Fetching the answer...")
66
+
67
+ # Get the answer
68
+ answer = chain.run(input_documents=documents, question=question)
69
+ st.success(f"Answer: {answer}")