Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pdf_exctraction import read_pdf | |
from transformers import pipeline | |
# Extract the Abstract from the content of the document | |
def extract_abstract(pdf_path): | |
text_from_pdf = read_pdf(pdf_path) | |
abstract_text = "" | |
for page_content in text_from_pdf.values(): | |
if "Abstract\n" in page_content[0]: | |
index_of_abstract = page_content[0].index("Abstract\n") | |
if index_of_abstract < len(page_content[0]) - 1: | |
abstract_text = page_content[0][index_of_abstract + 1] | |
if abstract_text == "": | |
raise gr.Error("The article does not contains an Abstract or it is not in the expected format") | |
return abstract_text | |
# Summarized the abstract extracted from PDF | |
def summarize_abstract(pdf_path): | |
abstract = extract_abstract(pdf_path) | |
abstract = abstract.replace("\n", " ").replace("- ", "").replace(" ", " ") | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
result = summarizer(abstract, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True) | |
return result[0]['summary_text'] |