# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2 # Import modules import re import fitz import scipy import torch from transformers import pipeline import gradio as gr # Path of the audio file to save TTS_AUDIO_PATH = "tts.wav" def extract_text(pdf_path): """ Function to extract text from PDF. """ with fitz.open(pdf_path) as pdf: text = "" for i, page in enumerate(pdf, start=1): text += page.get_text() return text def get_abstract(text): """ Function to get the abstract: - Remove the text before the abstract - Remove the text after the abstract (after the next title) - Remove new lines This works for some tested PDFs but obviously may not work with every possible layout. """ abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1] abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0] abstract = abstract.replace("-\n", "").replace("\n", " ") return abstract def generate_summary(abstract): """ Function to generate the summary. Use a model trained to generate one-line summary based on abstract of papers See: https://huggingface.co/snrspeaks/t5-one-line-summary """ summarizer = pipeline( "summarization", model="snrspeaks/t5-one-line-summary", device="cuda:0" if torch.cuda.is_available() else "cpu", ) summary = summarizer( abstract, max_length=64, # Maximum length of the summary )[0]["summary_text"] # If it's not already there, add a full stop at the end of the summary summary = summary if summary.endswith(".") else summary + "." return summary def generate_speech(summary): """ Function to generate the speech (TTS model). See: https://huggingface.co/suno/bark-small Note 1: I get some PyTorch warnings but it seems to work. Note 2: Sometimes (not always) this TTS model adds spurious sounds or words at the end (or more rarely at the beginning) of the speech related to the text being provided. """ synthesiser = pipeline( "text-to-speech", model="suno/bark-small", device="cuda:0" if torch.cuda.is_available() else "cpu", ) speech = synthesiser( summary, forward_params={"do_sample": True}, # From the bark-small usage example ) return speech def save_speech(speech, audio_path): """ Function to save the speech to a WAV file (from the bark-small usage example) """ scipy.io.wavfile.write( audio_path, rate=speech["sampling_rate"], data=speech["audio"].T, # Transpose to get shape (n_samples, n_channels) ) def synthesis(pdf_path): try: # Extract text from PDF text = extract_text(pdf_path) # Get the abstract abstract = get_abstract(text) except: return "ERROR: ABSTRACT NOT FOUND!!!", None # Generate the summary summary = generate_summary(abstract) # Generate the speech of the summary speech = generate_speech(summary) # Save the speech to a file save_speech(speech, TTS_AUDIO_PATH) return summary, TTS_AUDIO_PATH # Build and launch the app summary_tts = gr.Interface( fn=synthesis, inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"), outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")], title="PDF voice abstract summarization", description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.", examples=[ "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf", "Article 7 Efficient Estimation of Word Representations in Vector Space.pdf", "Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf", ], ) summary_tts.launch()