Spaces:

kahennefer
/

kh_audioprocessing

Sleeping

File size: 3,132 Bytes


#libraries
import gradio as gr
from PyPDF2 import PdfReader
from tempfile import NamedTemporaryFile
from transformers import pipeline
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
from IPython.display import Audio
import torch


def summarize_abstract_from_pdf(pdf_file_path):
    # Initialize the summarization pipeline

    abstract_string = 'abstract'
    found_abstract = False
    intro_string ='introduction'
    extracted_text_string =""

    # Read the PDF and extract text from the first page
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PdfReader(pdf_file)
        text = ""
        text += reader.pages[0].extract_text()


    file = text.splitlines()
    for lines in file:
      lower_lines = lines.lower()
      if lower_lines.strip()== abstract_string:
        found_abstract = True
      elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
        found_abstract = False

      if found_abstract == True:
        extracted_text_string += lines


    extracted_text_string = extracted_text_string.replace("Abstract", "")
    summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
    # Generate a summarized abstract using the specified model
    summarized_abstract = summarizer(extracted_text_string,
    min_length=16,
    max_length=150,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )
    #I run this twice to get summazired text
    summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
    min_length=16,
    max_length=25,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )



    # Return the summarized abstract as a string
    return summarized_abstract2[0]['summary_text']

def generate_audio(pdf_file_path):
    model_name = "suno/bark-small"

    # Download and load the specified model
    preload_models(model_name)
    # Access the input file path
    pdf_file_path = pdf_file.name

    # Generate audio from text
    #call the summarize abstract function
    text_prompt =  summarize_abstract_from_pdf(pdf_file_path)
    audio_array = generate_audio(text_prompt)
 
    # Create a temporary WAV file to save the audio
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
        wav_file_path = temp_wav_file.name
        write_wav(wav_file_path, SAMPLE_RATE, audio_array)

    # Return the path to the saved audio file
    return wav_file_path



# Define app name, app description, and examples
app_name = "PDF to Audio Converter"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files"

# Create the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()

demo = gr.Interface(
    fn=generate_audio,
    inputs=input_component,
    outputs=output_component,
    title=app_name,
    description=app_description
)
demo.launch()