Spaces:

ndhananj
/

pdf2podcast

Sleeping

App Files Files Community

ndhananj commited on Sep 1, 2024

Commit

9c65d04

verified ·

1 Parent(s): d21ebb4

Create app.py

Browse files

Files changed (1) hide show

app.py +177 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+from openai import OpenAI
+import tiktoken
+import nltk
+nltk.download('punkt')
+from nltk.tokenize import sent_tokenize
+from pypdf import PdfReader, PageRange
+## Function to read the uploaded PDF
+def read_data_from_PDF(input_path):
+  input_text = ''
+  print ('Reading PDF from path', input_path)
+  reader = PdfReader(input_path)
+  number_of_pages = len(reader.pages)
+  print ('PDF has been read with ', number_of_pages, ' pages')
+  for page in reader.pages:
+    input_text += page.extract_text() + "\n"
+  return input_text
+## Function to split the text into sentences
+def split_text (input_text):
+  split_texts = sent_tokenize(input_text)
+  return split_texts
+## Function to create chunks while considering sentences
+def create_chunks(split_sents, max_token_len=50):
+  enc = tiktoken.encoding_for_model("gpt-4o-mini")
+  current_token_len = 0
+  input_chunks = []
+  current_chunk = ""
+  for sents in split_sents:
+    sent_token_len = len(enc.encode(sents))
+    if (current_token_len + sent_token_len) > max_token_len:
+      input_chunks.append(current_chunk)
+      current_chunk = ""
+      current_token_len = 0
+    current_chunk = current_chunk + sents
+    current_token_len = current_token_len + sent_token_len
+  if current_chunk != "":
+    input_chunks.append(current_chunk)
+  return input_chunks
+## Function to create chunks
+def create_input_chunks(input_text):
+  split_sents = split_text(input_text)
+  input_chunks = create_chunks(split_sents, max_token_len=3000)
+  return input_chunks
+## Function to create summary of the given input text
+def create_summary_points(input_chunks):
+  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
+  instructPrompt = """
+                  You are provided with a piece of text regarding a subject. Please identify the key facts and highlights about this piece of text.
+                  Please provide the output in the form of a list something like -
+                  - Highlight 1
+                  - Highlight 2
+                  - Highlight 3
+                  """
+  podcastFacts = []
+  for text in input_chunks:
+    request = instructPrompt + '\n' + text
+    chatOutput = client.chat.completions.create(model="gpt-4o-mini",
+                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                                                      {"role": "user", "content": request}
+                                                      ]
+                                            )
+    podcastFacts.append(chatOutput.choices[0].message.content)
+  return "\n".join(podcastFacts)
+## Two different prompt styles for the podcast conversation
+debate_podcast_prompt = """
+Could you simulate a podcast conversation in a debate-style between two experts, \"Sid\" and \"Darren\", discussing the following key points extracted from a research paper?
+Some things that you need to keep in mind while creating the conversation:
+- In the debate, Sid takes a stance that has a positive view of the findings and supports the implications and findings represented by these key points. They provide their reasoning and analogical examples to back up their interpretations.
+- Conversely, Darren adopts a more critical or alternative viewpoint. They question some of the findings by discussing potential drawbacks, limitations, or different outcomes.
+- The conversation should see both experts engaging with each key point, presenting their views, challenging each other's interpretations, and discussing the broader implications of their arguments.
+- The debate should be balanced, allowing each expert to articulate their perspective comprehensively.
+- Conclude the conversation with each expert summarizing their overall position on the topic.
+Here's some of the facts from the topic.
+"""
+casual_podcast_prompt = """
+Could you simulate a podcast conversation between \"Sid\" and \"Darren\" having a conversation about the following facts?
+Some things I'd like to ask:
+  - Use \"Sid:\" and \"Darren:\" to indicate who is speaking.
+  - Start the dialog with a casual discussion on what each person is drinking right now.
+  - Make the dialog about this as long as possible and make it sound funny
+  - Sid is the one presenting the information, Darren is asking intelligent questions that help Sid elaborate the facts.
+Here's some of the facts from the topic.
+"""
+styles = {'casual':casual_podcast_prompt,
+          'debate': debate_podcast_prompt}
+## Function to create the podcast script
+def create_podcast_script(podcast_points, output_style):
+  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
+  instructPrompt = styles[output_style]
+  request = instructPrompt + '\n' + podcast_points
+  chatOutput = client.chat.completions.create(model="gpt-4o-mini",
+                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                                                      {"role": "user", "content": request}
+                                                      ]
+                                            )
+  return chatOutput.choices[0].message.content
+## Function to call all the podcast script generation steps
+def create_podcast(input_path, output_style):
+  input_text = read_data_from_PDF(input_path)
+  input_chunks = create_input_chunks(input_text)
+  podcastHighlights = create_summary_points(input_chunks)
+  podcastScript = create_podcast_script(podcastHighlights, output_style)
+  return podcastScript
+## Function to generate speech from input text
+def openai_generation(input_text, speaker_voice, model_choice="tts-1"):
+  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
+  response = client.audio.speech.create(
+      model=model_choice,
+      voice=speaker_voice,
+      input=input_text
+  )
+  return response.read()
+## Function to generate complete audio podcast from script
+## NOTE: this function assumes that there are only two speakers; please modify if you have multiple speakers in the script
+def create_podcast_audio(podcastScript, speakerName1="Sid", speakerChoice1='onyx', speakerName2="Darren", speakerChoice2='echo'):
+  genPodcast = []
+  podcastLines = podcastScript.split('\n\n')
+  podcastLineNumber = 0
+  for line in podcastLines:
+    if podcastLineNumber % 2 == 0:
+      speakerChoice = speakerChoice1
+      line = line.replace(speakerName1+":", '')
+    else:
+      speakerChoice = speakerChoice2
+      line = line.replace(speakerName2+":", '')
+    genVoice = openai_generation(input_text=line, speaker_voice=speakerChoice, model_choice="tts-1")
+    genPodcast.append(genVoice)
+    podcastLineNumber += 1
+  with open("genPodcast.mp3", "wb") as f:
+    for pod in genPodcast:
+      f.write(pod)
+  return "genPodcast.mp3"
+import gradio as gr
+def upload_file(file):
+    return file.name
+with gr.Blocks() as demo:
+    file_output = gr.File()
+    upload_button = gr.UploadButton("Click to Upload a PDF", file_types=[".pdf"], file_count="single")
+    upload_button.upload(upload_file, upload_button, file_output)
+    podcast_style = gr.Dropdown(styles.keys(), label="podcast_style")
+    generate_podcast_button = gr.Button("Generate Podcast Script")
+    podcast_script = gr.Textbox(interactive=True, label="podcast_script")
+    generate_podcast_button.click(fn=create_podcast, inputs=[file_output, podcast_style], outputs=podcast_script, api_name="generate_podcast_script")
+    generate_audio_button = gr.Button("Generate Audio Version")
+    podcast_audio = gr.Audio(label="podcast_audio", interactive=False, type="filepath")
+    generate_audio_button.click(fn=create_podcast_audio, inputs=podcast_script, outputs=podcast_audio, api_name="generate_podcast_audio")
+demo.launch(debug=True, share=True)