manuel-calzolari's picture
Upload assessment
1b6419e
# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2
# Import modules
import re
import fitz
import scipy
import torch
from transformers import pipeline
import gradio as gr
# Path of the audio file to save
TTS_AUDIO_PATH = "tts.wav"
def extract_text(pdf_path):
"""
Function to extract text from PDF.
"""
with fitz.open(pdf_path) as pdf:
text = ""
for i, page in enumerate(pdf, start=1):
text += page.get_text()
return text
def get_abstract(text):
"""
Function to get the abstract:
- Remove the text before the abstract
- Remove the text after the abstract (after the next title)
- Remove new lines
This works for some tested PDFs but obviously may not work with every
possible layout.
"""
abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1]
abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0]
abstract = abstract.replace("-\n", "").replace("\n", " ")
return abstract
def generate_summary(abstract):
"""
Function to generate the summary.
Use a model trained to generate one-line summary based on abstract of papers
See: https://huggingface.co/snrspeaks/t5-one-line-summary
"""
summarizer = pipeline(
"summarization",
model="snrspeaks/t5-one-line-summary",
device="cuda:0" if torch.cuda.is_available() else "cpu",
)
summary = summarizer(
abstract,
max_length=64, # Maximum length of the summary
)[0]["summary_text"]
# If it's not already there, add a full stop at the end of the summary
summary = summary if summary.endswith(".") else summary + "."
return summary
def generate_speech(summary):
"""
Function to generate the speech (TTS model).
See: https://huggingface.co/suno/bark-small
Note 1: I get some PyTorch warnings but it seems to work.
Note 2: Sometimes (not always) this TTS model adds spurious sounds or words
at the end (or more rarely at the beginning) of the speech related to the
text being provided.
"""
synthesiser = pipeline(
"text-to-speech",
model="suno/bark-small",
device="cuda:0" if torch.cuda.is_available() else "cpu",
)
speech = synthesiser(
summary,
forward_params={"do_sample": True}, # From the bark-small usage example
)
return speech
def save_speech(speech, audio_path):
"""
Function to save the speech to a WAV file (from the bark-small usage example)
"""
scipy.io.wavfile.write(
audio_path,
rate=speech["sampling_rate"],
data=speech["audio"].T, # Transpose to get shape (n_samples, n_channels)
)
def synthesis(pdf_path):
try:
# Extract text from PDF
text = extract_text(pdf_path)
# Get the abstract
abstract = get_abstract(text)
except:
return "ERROR: ABSTRACT NOT FOUND!!!", None
# Generate the summary
summary = generate_summary(abstract)
# Generate the speech of the summary
speech = generate_speech(summary)
# Save the speech to a file
save_speech(speech, TTS_AUDIO_PATH)
return summary, TTS_AUDIO_PATH
# Build and launch the app
summary_tts = gr.Interface(
fn=synthesis,
inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"),
outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")],
title="PDF voice abstract summarization",
description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.",
examples=[
"Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
"Article 7 Efficient Estimation of Word Representations in Vector Space.pdf",
"Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
],
)
summary_tts.launch()