Spaces:
Runtime error
Runtime error
# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2 | |
# Import modules | |
import re | |
import fitz | |
import scipy | |
import torch | |
from transformers import pipeline | |
import gradio as gr | |
# Path of the audio file to save | |
TTS_AUDIO_PATH = "tts.wav" | |
def extract_text(pdf_path): | |
""" | |
Function to extract text from PDF. | |
""" | |
with fitz.open(pdf_path) as pdf: | |
text = "" | |
for i, page in enumerate(pdf, start=1): | |
text += page.get_text() | |
return text | |
def get_abstract(text): | |
""" | |
Function to get the abstract: | |
- Remove the text before the abstract | |
- Remove the text after the abstract (after the next title) | |
- Remove new lines | |
This works for some tested PDFs but obviously may not work with every | |
possible layout. | |
""" | |
abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1] | |
abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0] | |
abstract = abstract.replace("-\n", "").replace("\n", " ") | |
return abstract | |
def generate_summary(abstract): | |
""" | |
Function to generate the summary. | |
Use a model trained to generate one-line summary based on abstract of papers | |
See: https://huggingface.co/snrspeaks/t5-one-line-summary | |
""" | |
summarizer = pipeline( | |
"summarization", | |
model="snrspeaks/t5-one-line-summary", | |
device="cuda:0" if torch.cuda.is_available() else "cpu", | |
) | |
summary = summarizer( | |
abstract, | |
max_length=64, # Maximum length of the summary | |
)[0]["summary_text"] | |
# If it's not already there, add a full stop at the end of the summary | |
summary = summary if summary.endswith(".") else summary + "." | |
return summary | |
def generate_speech(summary): | |
""" | |
Function to generate the speech (TTS model). | |
See: https://huggingface.co/suno/bark-small | |
Note 1: I get some PyTorch warnings but it seems to work. | |
Note 2: Sometimes (not always) this TTS model adds spurious sounds or words | |
at the end (or more rarely at the beginning) of the speech related to the | |
text being provided. | |
""" | |
synthesiser = pipeline( | |
"text-to-speech", | |
model="suno/bark-small", | |
device="cuda:0" if torch.cuda.is_available() else "cpu", | |
) | |
speech = synthesiser( | |
summary, | |
forward_params={"do_sample": True}, # From the bark-small usage example | |
) | |
return speech | |
def save_speech(speech, audio_path): | |
""" | |
Function to save the speech to a WAV file (from the bark-small usage example) | |
""" | |
scipy.io.wavfile.write( | |
audio_path, | |
rate=speech["sampling_rate"], | |
data=speech["audio"].T, # Transpose to get shape (n_samples, n_channels) | |
) | |
def synthesis(pdf_path): | |
try: | |
# Extract text from PDF | |
text = extract_text(pdf_path) | |
# Get the abstract | |
abstract = get_abstract(text) | |
except: | |
return "ERROR: ABSTRACT NOT FOUND!!!", None | |
# Generate the summary | |
summary = generate_summary(abstract) | |
# Generate the speech of the summary | |
speech = generate_speech(summary) | |
# Save the speech to a file | |
save_speech(speech, TTS_AUDIO_PATH) | |
return summary, TTS_AUDIO_PATH | |
# Build and launch the app | |
summary_tts = gr.Interface( | |
fn=synthesis, | |
inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"), | |
outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")], | |
title="PDF voice abstract summarization", | |
description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.", | |
examples=[ | |
"Article 11 Hidden Technical Debt in Machine Learning Systems.pdf", | |
"Article 7 Efficient Estimation of Word Representations in Vector Space.pdf", | |
"Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf", | |
], | |
) | |
summary_tts.launch() | |