Priyanka-Balivada commited on
Commit
302157d
·
verified ·
1 Parent(s): 93dc82b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypdf import PdfReader
2
+ import streamlit as st
3
+
4
+ def get_pdf_text(pdf_docs):
5
+ text = ""
6
+ for pdf in pdf_docs:
7
+ pdf_reader = PdfReader(pdf)
8
+ for page in pdf_reader.pages:
9
+ text += page.extract_text()
10
+ return text
11
+
12
+ raw_text=""
13
+ with st.sidebar:
14
+ st.title("Menu:")
15
+ pdf_docs = st.file_uploader(
16
+ "Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True
17
+ )
18
+ if st.button("Submit & Process"):
19
+ with st.spinner("Processing..."):
20
+ raw_text = get_pdf_text(pdf_docs)
21
+
22
+ st.write(raw_text)
23
+
24
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
25
+
26
+ # Load the pre-trained tokenizer and model
27
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
28
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
29
+
30
+ # Tokenize the text
31
+ inputs = tokenizer(raw_text, return_tensors="pt", max_length=1024, truncation=True)
32
+
33
+ # Generate the summary
34
+ summary_ids = model.generate(inputs["input_ids"], num_beams=4, min_length=30, max_length=200, early_stopping=True)
35
+
36
+ # Decode the summary
37
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
38
+
39
+ print("\n\nSummary:\n", summary)
40
+ print("\n\n\nOriginal text:\n", raw_text)
41
+