Spaces:

ndhananj
/

pdf2podcast

Sleeping

App Files Files Community

pdf2podcast / app.py

ndhananj

Create app.py

9c65d04 verified 7 months ago

raw

history blame contribute delete

7.71 kB

	import os
	from openai import OpenAI
	import tiktoken

	import nltk
	nltk.download('punkt')
	from nltk.tokenize import sent_tokenize

	from pypdf import PdfReader, PageRange

	## Function to read the uploaded PDF
	def read_data_from_PDF(input_path):
	input_text = ''
	print ('Reading PDF from path', input_path)
	reader = PdfReader(input_path)
	number_of_pages = len(reader.pages)
	print ('PDF has been read with ', number_of_pages, ' pages')
	for page in reader.pages:
	input_text += page.extract_text() + "\n"
	return input_text


	## Function to split the text into sentences
	def split_text (input_text):
	split_texts = sent_tokenize(input_text)
	return split_texts


	## Function to create chunks while considering sentences
	def create_chunks(split_sents, max_token_len=50):
	enc = tiktoken.encoding_for_model("gpt-4o-mini")
	current_token_len = 0
	input_chunks = []
	current_chunk = ""
	for sents in split_sents:
	sent_token_len = len(enc.encode(sents))
	if (current_token_len + sent_token_len) > max_token_len:
	input_chunks.append(current_chunk)
	current_chunk = ""
	current_token_len = 0
	current_chunk = current_chunk + sents
	current_token_len = current_token_len + sent_token_len
	if current_chunk != "":
	input_chunks.append(current_chunk)
	return input_chunks


	## Function to create chunks
	def create_input_chunks(input_text):
	split_sents = split_text(input_text)
	input_chunks = create_chunks(split_sents, max_token_len=3000)
	return input_chunks


	## Function to create summary of the given input text
	def create_summary_points(input_chunks):
	client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
	instructPrompt = """
	You are provided with a piece of text regarding a subject. Please identify the key facts and highlights about this piece of text.
	Please provide the output in the form of a list something like -

	- Highlight 1
	- Highlight 2
	- Highlight 3
	"""
	podcastFacts = []
	for text in input_chunks:
	request = instructPrompt + '\n' + text
	chatOutput = client.chat.completions.create(model="gpt-4o-mini",
	messages=[{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": request}
	]
	)
	podcastFacts.append(chatOutput.choices[0].message.content)
	return "\n".join(podcastFacts)


	## Two different prompt styles for the podcast conversation
	debate_podcast_prompt = """
	Could you simulate a podcast conversation in a debate-style between two experts, \"Sid\" and \"Darren\", discussing the following key points extracted from a research paper?
	Some things that you need to keep in mind while creating the conversation:
	- In the debate, Sid takes a stance that has a positive view of the findings and supports the implications and findings represented by these key points. They provide their reasoning and analogical examples to back up their interpretations.
	- Conversely, Darren adopts a more critical or alternative viewpoint. They question some of the findings by discussing potential drawbacks, limitations, or different outcomes.
	- The conversation should see both experts engaging with each key point, presenting their views, challenging each other's interpretations, and discussing the broader implications of their arguments.
	- The debate should be balanced, allowing each expert to articulate their perspective comprehensively.
	- Conclude the conversation with each expert summarizing their overall position on the topic.
	Here's some of the facts from the topic.
	"""

	casual_podcast_prompt = """
	Could you simulate a podcast conversation between \"Sid\" and \"Darren\" having a conversation about the following facts?
	Some things I'd like to ask:
	- Use \"Sid:\" and \"Darren:\" to indicate who is speaking.
	- Start the dialog with a casual discussion on what each person is drinking right now.
	- Make the dialog about this as long as possible and make it sound funny
	- Sid is the one presenting the information, Darren is asking intelligent questions that help Sid elaborate the facts.
	Here's some of the facts from the topic.
	"""

	styles = {'casual':casual_podcast_prompt,
	'debate': debate_podcast_prompt}


	## Function to create the podcast script
	def create_podcast_script(podcast_points, output_style):
	client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
	instructPrompt = styles[output_style]
	request = instructPrompt + '\n' + podcast_points
	chatOutput = client.chat.completions.create(model="gpt-4o-mini",
	messages=[{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": request}
	]
	)
	return chatOutput.choices[0].message.content


	## Function to call all the podcast script generation steps
	def create_podcast(input_path, output_style):
	input_text = read_data_from_PDF(input_path)
	input_chunks = create_input_chunks(input_text)
	podcastHighlights = create_summary_points(input_chunks)
	podcastScript = create_podcast_script(podcastHighlights, output_style)
	return podcastScript


	## Function to generate speech from input text
	def openai_generation(input_text, speaker_voice, model_choice="tts-1"):
	client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
	response = client.audio.speech.create(
	model=model_choice,
	voice=speaker_voice,
	input=input_text
	)
	return response.read()


	## Function to generate complete audio podcast from script
	## NOTE: this function assumes that there are only two speakers; please modify if you have multiple speakers in the script
	def create_podcast_audio(podcastScript, speakerName1="Sid", speakerChoice1='onyx', speakerName2="Darren", speakerChoice2='echo'):
	genPodcast = []
	podcastLines = podcastScript.split('\n\n')
	podcastLineNumber = 0
	for line in podcastLines:
	if podcastLineNumber % 2 == 0:
	speakerChoice = speakerChoice1
	line = line.replace(speakerName1+":", '')
	else:
	speakerChoice = speakerChoice2
	line = line.replace(speakerName2+":", '')
	genVoice = openai_generation(input_text=line, speaker_voice=speakerChoice, model_choice="tts-1")
	genPodcast.append(genVoice)
	podcastLineNumber += 1
	with open("genPodcast.mp3", "wb") as f:
	for pod in genPodcast:
	f.write(pod)
	return "genPodcast.mp3"

	import gradio as gr

	def upload_file(file):
	return file.name

	with gr.Blocks() as demo:
	file_output = gr.File()
	upload_button = gr.UploadButton("Click to Upload a PDF", file_types=[".pdf"], file_count="single")
	upload_button.upload(upload_file, upload_button, file_output)
	podcast_style = gr.Dropdown(styles.keys(), label="podcast_style")
	generate_podcast_button = gr.Button("Generate Podcast Script")
	podcast_script = gr.Textbox(interactive=True, label="podcast_script")

	generate_podcast_button.click(fn=create_podcast, inputs=[file_output, podcast_style], outputs=podcast_script, api_name="generate_podcast_script")

	generate_audio_button = gr.Button("Generate Audio Version")
	podcast_audio = gr.Audio(label="podcast_audio", interactive=False, type="filepath")
	generate_audio_button.click(fn=create_podcast_audio, inputs=podcast_script, outputs=podcast_audio, api_name="generate_podcast_audio")

	demo.launch(debug=True, share=True)