import gradio as gr
from pdf_exctraction import read_pdf
from transformers import pipeline

# Extract the Abstract from the content of the document
def extract_abstract(pdf_path):
    text_from_pdf = read_pdf(pdf_path)
    abstract_text = ""
    for page_content in text_from_pdf.values():
        if "Abstract\n" in page_content[0]:
            index_of_abstract = page_content[0].index("Abstract\n")
            if index_of_abstract < len(page_content[0]) - 1:
                abstract_text = page_content[0][index_of_abstract + 1] 
    if abstract_text == "":
        raise gr.Error("The article does not contains an Abstract or it is not in the expected format")
    return abstract_text   

# Summarized the abstract extracted from PDF
def summarize_abstract(pdf_path):
    abstract = extract_abstract(pdf_path)
    abstract = abstract.replace("\n", " ").replace("- ", "").replace("  ", " ")
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    result = summarizer(abstract, max_length=50, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True)
    return result[0]['summary_text']