pdf2podcast / app.py
ndhananj's picture
Create app.py
9c65d04 verified
import os
from openai import OpenAI
import tiktoken
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
from pypdf import PdfReader, PageRange
## Function to read the uploaded PDF
def read_data_from_PDF(input_path):
input_text = ''
print ('Reading PDF from path', input_path)
reader = PdfReader(input_path)
number_of_pages = len(reader.pages)
print ('PDF has been read with ', number_of_pages, ' pages')
for page in reader.pages:
input_text += page.extract_text() + "\n"
return input_text
## Function to split the text into sentences
def split_text (input_text):
split_texts = sent_tokenize(input_text)
return split_texts
## Function to create chunks while considering sentences
def create_chunks(split_sents, max_token_len=50):
enc = tiktoken.encoding_for_model("gpt-4o-mini")
current_token_len = 0
input_chunks = []
current_chunk = ""
for sents in split_sents:
sent_token_len = len(enc.encode(sents))
if (current_token_len + sent_token_len) > max_token_len:
input_chunks.append(current_chunk)
current_chunk = ""
current_token_len = 0
current_chunk = current_chunk + sents
current_token_len = current_token_len + sent_token_len
if current_chunk != "":
input_chunks.append(current_chunk)
return input_chunks
## Function to create chunks
def create_input_chunks(input_text):
split_sents = split_text(input_text)
input_chunks = create_chunks(split_sents, max_token_len=3000)
return input_chunks
## Function to create summary of the given input text
def create_summary_points(input_chunks):
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
instructPrompt = """
You are provided with a piece of text regarding a subject. Please identify the key facts and highlights about this piece of text.
Please provide the output in the form of a list something like -
- Highlight 1
- Highlight 2
- Highlight 3
"""
podcastFacts = []
for text in input_chunks:
request = instructPrompt + '\n' + text
chatOutput = client.chat.completions.create(model="gpt-4o-mini",
messages=[{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": request}
]
)
podcastFacts.append(chatOutput.choices[0].message.content)
return "\n".join(podcastFacts)
## Two different prompt styles for the podcast conversation
debate_podcast_prompt = """
Could you simulate a podcast conversation in a debate-style between two experts, \"Sid\" and \"Darren\", discussing the following key points extracted from a research paper?
Some things that you need to keep in mind while creating the conversation:
- In the debate, Sid takes a stance that has a positive view of the findings and supports the implications and findings represented by these key points. They provide their reasoning and analogical examples to back up their interpretations.
- Conversely, Darren adopts a more critical or alternative viewpoint. They question some of the findings by discussing potential drawbacks, limitations, or different outcomes.
- The conversation should see both experts engaging with each key point, presenting their views, challenging each other's interpretations, and discussing the broader implications of their arguments.
- The debate should be balanced, allowing each expert to articulate their perspective comprehensively.
- Conclude the conversation with each expert summarizing their overall position on the topic.
Here's some of the facts from the topic.
"""
casual_podcast_prompt = """
Could you simulate a podcast conversation between \"Sid\" and \"Darren\" having a conversation about the following facts?
Some things I'd like to ask:
- Use \"Sid:\" and \"Darren:\" to indicate who is speaking.
- Start the dialog with a casual discussion on what each person is drinking right now.
- Make the dialog about this as long as possible and make it sound funny
- Sid is the one presenting the information, Darren is asking intelligent questions that help Sid elaborate the facts.
Here's some of the facts from the topic.
"""
styles = {'casual':casual_podcast_prompt,
'debate': debate_podcast_prompt}
## Function to create the podcast script
def create_podcast_script(podcast_points, output_style):
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
instructPrompt = styles[output_style]
request = instructPrompt + '\n' + podcast_points
chatOutput = client.chat.completions.create(model="gpt-4o-mini",
messages=[{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": request}
]
)
return chatOutput.choices[0].message.content
## Function to call all the podcast script generation steps
def create_podcast(input_path, output_style):
input_text = read_data_from_PDF(input_path)
input_chunks = create_input_chunks(input_text)
podcastHighlights = create_summary_points(input_chunks)
podcastScript = create_podcast_script(podcastHighlights, output_style)
return podcastScript
## Function to generate speech from input text
def openai_generation(input_text, speaker_voice, model_choice="tts-1"):
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
response = client.audio.speech.create(
model=model_choice,
voice=speaker_voice,
input=input_text
)
return response.read()
## Function to generate complete audio podcast from script
## NOTE: this function assumes that there are only two speakers; please modify if you have multiple speakers in the script
def create_podcast_audio(podcastScript, speakerName1="Sid", speakerChoice1='onyx', speakerName2="Darren", speakerChoice2='echo'):
genPodcast = []
podcastLines = podcastScript.split('\n\n')
podcastLineNumber = 0
for line in podcastLines:
if podcastLineNumber % 2 == 0:
speakerChoice = speakerChoice1
line = line.replace(speakerName1+":", '')
else:
speakerChoice = speakerChoice2
line = line.replace(speakerName2+":", '')
genVoice = openai_generation(input_text=line, speaker_voice=speakerChoice, model_choice="tts-1")
genPodcast.append(genVoice)
podcastLineNumber += 1
with open("genPodcast.mp3", "wb") as f:
for pod in genPodcast:
f.write(pod)
return "genPodcast.mp3"
import gradio as gr
def upload_file(file):
return file.name
with gr.Blocks() as demo:
file_output = gr.File()
upload_button = gr.UploadButton("Click to Upload a PDF", file_types=[".pdf"], file_count="single")
upload_button.upload(upload_file, upload_button, file_output)
podcast_style = gr.Dropdown(styles.keys(), label="podcast_style")
generate_podcast_button = gr.Button("Generate Podcast Script")
podcast_script = gr.Textbox(interactive=True, label="podcast_script")
generate_podcast_button.click(fn=create_podcast, inputs=[file_output, podcast_style], outputs=podcast_script, api_name="generate_podcast_script")
generate_audio_button = gr.Button("Generate Audio Version")
podcast_audio = gr.Audio(label="podcast_audio", interactive=False, type="filepath")
generate_audio_button.click(fn=create_podcast_audio, inputs=podcast_script, outputs=podcast_audio, api_name="generate_podcast_audio")
demo.launch(debug=True, share=True)