import gradio as gr from pdf_exctraction import read_pdf from transformers import pipeline # Extract the Abstract from the content of the document def extract_abstract(pdf_path): text_from_pdf = read_pdf(pdf_path) abstract_text = "" for page_content in text_from_pdf.values(): if "Abstract\n" in page_content[0]: index_of_abstract = page_content[0].index("Abstract\n") if index_of_abstract < len(page_content[0]) - 1: abstract_text = page_content[0][index_of_abstract + 1] if abstract_text == "": raise gr.Error("The article does not contains an Abstract or it is not in the expected format") return abstract_text # Summarized the abstract extracted from PDF def summarize_abstract(pdf_path): abstract = extract_abstract(pdf_path) abstract = abstract.replace("\n", " ").replace("- ", "").replace(" ", " ") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") result = summarizer(abstract, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True) return result[0]['summary_text']