Spaces:

kahennefer
/

kh_audioprocessing

Sleeping

File size: 3,122 Bytes

cbeccc1
65bdf91
7f7a851
670c797
8efae88
 
7f7a851
accedef
803416f
7d60a60
61ccf5a
8efae88
3cf16c8
8efae88
 
b2e2224
 
8efae88
 
b2e2224
 
 
 
8efae88
 
 
b2e2224
 
 
8efae88
 
 
 
 
b2e2224
8efae88
 
b2e2224
61ccf5a
8efae88
 
b2e2224
8efae88
 
 
 
 
 
 
 
b2e2224
8efae88
 
 
 
 
 
 
 
 
61ccf5a
b2e2224
8efae88
 
b2e2224
 
 
7f7a851
61ccf5a
8efae88
61ccf5a
7f7a851
61ccf5a
8efae88
 
 
 
61ccf5a
8efae88
 
 
 
 
 
 
b2e2224
 
 
 
9ba855d
b2e2224
 
7a8cb88
0f684ad
9ba855d
b2e2224
e4f976c
7a8cb88
9ba855d
b2e2224
4afe3aa
6f0fe07
accedef
8efae88

import gradio as gr
from transformers import pipeline
from tempfile import NamedTemporaryFile
from PyPDF2 import PdfReader
from IPython.display import Audio
import numpy as np
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import torch

def summarize_abstract_from_pdf(pdf_file_path):
    
    
    # Initialize the summarization pipeline

    abstract_string = 'abstract'
    found_abstract = False
    intro_string ='introduction'
    extracted_text_string =""

    # Read the PDF and extract text from the first page
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PdfReader(pdf_file)
        text = ""
        text += reader.pages[0].extract_text()


    file = text.splitlines()
    for lines in file:
      lower_lines = lines.lower()
      if lower_lines.strip()== abstract_string:
        found_abstract = True
      elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
        found_abstract = False

      if found_abstract == True:
        extracted_text_string += lines


    extracted_text_string = extracted_text_string.replace("Abstract", "")
    summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
    # Generate a summarized abstract using the specified model
    summarized_abstract = summarizer(extracted_text_string,
    min_length=16,
    max_length=150,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )
    #I run this twice to get summazired text
    summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
    min_length=16,
    max_length=25,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )



    # Return the summarized abstract as a string
    return summarized_abstract2[0]['summary_text']

def generate_audio_func(pdf_file):
    model_name = "suno/bark-small"
  # Download and load the specified model
    preload_models(model_name)
    # Access the input file path
    pdf_file_path = pdf_file.name

  # Generate audio from text
  #call the summarize abstract function
    text_prompt =  summarize_abstract_from_pdf(pdf_file_path)
    audio_array = generate_audio(text_prompt)
    
  # Create a temporary WAV file to save the audio
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
        wav_file_path = temp_wav_file.name
        write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
    return wav_file_path



# Define app name, app description, and examples
app_name = "PDF to Audio Converter"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files"

# Create the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()

demo = gr.Interface(
    fn=generate_audio_func,
    inputs=input_component,
    outputs=output_component,
    title=app_name,
    description=app_description
)

demo.launch()