Spaces:
Sleeping
Sleeping
File size: 1,127 Bytes
0e6d852 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import gradio as gr
from pdf_exctraction import read_pdf
from transformers import pipeline
# Extract the Abstract from the content of the document
def extract_abstract(pdf_path):
text_from_pdf = read_pdf(pdf_path)
abstract_text = ""
for page_content in text_from_pdf.values():
if "Abstract\n" in page_content[0]:
index_of_abstract = page_content[0].index("Abstract\n")
if index_of_abstract < len(page_content[0]) - 1:
abstract_text = page_content[0][index_of_abstract + 1]
if abstract_text == "":
raise gr.Error("The article does not contains an Abstract or it is not in the expected format")
return abstract_text
# Summarized the abstract extracted from PDF
def summarize_abstract(pdf_path):
abstract = extract_abstract(pdf_path)
abstract = abstract.replace("\n", " ").replace("- ", "").replace(" ", " ")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
result = summarizer(abstract, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True)
return result[0]['summary_text'] |