Spaces:

manuel-calzolari
/

assessment3_part2

Runtime error

App Files Files Community

assessment3_part2 / app.py

manuel-calzolari

Upload assessment

1b6419e over 1 year ago

raw

history blame contribute delete

3.93 kB

	# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2

	# Import modules
	import re
	import fitz
	import scipy
	import torch
	from transformers import pipeline
	import gradio as gr

	# Path of the audio file to save
	TTS_AUDIO_PATH = "tts.wav"


	def extract_text(pdf_path):
	"""
	Function to extract text from PDF.
	"""
	with fitz.open(pdf_path) as pdf:
	text = ""
	for i, page in enumerate(pdf, start=1):
	text += page.get_text()
	return text


	def get_abstract(text):
	"""
	Function to get the abstract:
	- Remove the text before the abstract
	- Remove the text after the abstract (after the next title)
	- Remove new lines

	This works for some tested PDFs but obviously may not work with every
	possible layout.
	"""
	abstract = re.split(r"\n(?:Abstract\|ABSTRACT)\n", text)[1]
	abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0]
	abstract = abstract.replace("-\n", "").replace("\n", " ")
	return abstract


	def generate_summary(abstract):
	"""
	Function to generate the summary.
	Use a model trained to generate one-line summary based on abstract of papers
	See: https://huggingface.co/snrspeaks/t5-one-line-summary
	"""
	summarizer = pipeline(
	"summarization",
	model="snrspeaks/t5-one-line-summary",
	device="cuda:0" if torch.cuda.is_available() else "cpu",
	)

	summary = summarizer(
	abstract,
	max_length=64, # Maximum length of the summary
	)[0]["summary_text"]

	# If it's not already there, add a full stop at the end of the summary
	summary = summary if summary.endswith(".") else summary + "."

	return summary


	def generate_speech(summary):
	"""
	Function to generate the speech (TTS model).
	See: https://huggingface.co/suno/bark-small
	Note 1: I get some PyTorch warnings but it seems to work.
	Note 2: Sometimes (not always) this TTS model adds spurious sounds or words
	at the end (or more rarely at the beginning) of the speech related to the
	text being provided.
	"""
	synthesiser = pipeline(
	"text-to-speech",
	model="suno/bark-small",
	device="cuda:0" if torch.cuda.is_available() else "cpu",
	)

	speech = synthesiser(
	summary,
	forward_params={"do_sample": True}, # From the bark-small usage example
	)

	return speech


	def save_speech(speech, audio_path):
	"""
	Function to save the speech to a WAV file (from the bark-small usage example)
	"""
	scipy.io.wavfile.write(
	audio_path,
	rate=speech["sampling_rate"],
	data=speech["audio"].T, # Transpose to get shape (n_samples, n_channels)
	)


	def synthesis(pdf_path):
	try:
	# Extract text from PDF
	text = extract_text(pdf_path)
	# Get the abstract
	abstract = get_abstract(text)
	except:
	return "ERROR: ABSTRACT NOT FOUND!!!", None
	# Generate the summary
	summary = generate_summary(abstract)
	# Generate the speech of the summary
	speech = generate_speech(summary)
	# Save the speech to a file
	save_speech(speech, TTS_AUDIO_PATH)

	return summary, TTS_AUDIO_PATH


	# Build and launch the app
	summary_tts = gr.Interface(
	fn=synthesis,
	inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"),
	outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")],
	title="PDF voice abstract summarization",
	description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.",
	examples=[
	"Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
	"Article 7 Efficient Estimation of Word Representations in Vector Space.pdf",
	"Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
	],
	)
	summary_tts.launch()