File size: 7,706 Bytes
9c65d04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import os
from openai import OpenAI
import tiktoken

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

from pypdf import PdfReader, PageRange

## Function to read the uploaded PDF
def read_data_from_PDF(input_path):
  input_text = ''
  print ('Reading PDF from path', input_path)
  reader = PdfReader(input_path)
  number_of_pages = len(reader.pages)
  print ('PDF has been read with ', number_of_pages, ' pages')
  for page in reader.pages:
    input_text += page.extract_text() + "\n"
  return input_text


## Function to split the text into sentences
def split_text (input_text):
  split_texts = sent_tokenize(input_text)
  return split_texts


## Function to create chunks while considering sentences
def create_chunks(split_sents, max_token_len=50):
  enc = tiktoken.encoding_for_model("gpt-4o-mini")
  current_token_len = 0
  input_chunks = []
  current_chunk = ""
  for sents in split_sents:
    sent_token_len = len(enc.encode(sents))
    if (current_token_len + sent_token_len) > max_token_len:
      input_chunks.append(current_chunk)
      current_chunk = ""
      current_token_len = 0
    current_chunk = current_chunk + sents
    current_token_len = current_token_len + sent_token_len
  if current_chunk != "":
    input_chunks.append(current_chunk)
  return input_chunks


## Function to create chunks
def create_input_chunks(input_text):
  split_sents = split_text(input_text)
  input_chunks = create_chunks(split_sents, max_token_len=3000)
  return input_chunks


## Function to create summary of the given input text
def create_summary_points(input_chunks):
  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
  instructPrompt = """
                  You are provided with a piece of text regarding a subject. Please identify the key facts and highlights about this piece of text.
                  Please provide the output in the form of a list something like -

                  - Highlight 1
                  - Highlight 2
                  - Highlight 3
                  """
  podcastFacts = []
  for text in input_chunks:
    request = instructPrompt + '\n' + text
    chatOutput = client.chat.completions.create(model="gpt-4o-mini",
                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
                                                      {"role": "user", "content": request}
                                                      ]
                                            )
    podcastFacts.append(chatOutput.choices[0].message.content)
  return "\n".join(podcastFacts)


## Two different prompt styles for the podcast conversation
debate_podcast_prompt = """
Could you simulate a podcast conversation in a debate-style between two experts, \"Sid\" and \"Darren\", discussing the following key points extracted from a research paper?
Some things that you need to keep in mind while creating the conversation:
- In the debate, Sid takes a stance that has a positive view of the findings and supports the implications and findings represented by these key points. They provide their reasoning and analogical examples to back up their interpretations.
- Conversely, Darren adopts a more critical or alternative viewpoint. They question some of the findings by discussing potential drawbacks, limitations, or different outcomes.
- The conversation should see both experts engaging with each key point, presenting their views, challenging each other's interpretations, and discussing the broader implications of their arguments.
- The debate should be balanced, allowing each expert to articulate their perspective comprehensively.
- Conclude the conversation with each expert summarizing their overall position on the topic.
Here's some of the facts from the topic.
"""

casual_podcast_prompt = """
Could you simulate a podcast conversation between \"Sid\" and \"Darren\" having a conversation about the following facts?
Some things I'd like to ask:
  - Use \"Sid:\" and \"Darren:\" to indicate who is speaking.
  - Start the dialog with a casual discussion on what each person is drinking right now.
  - Make the dialog about this as long as possible and make it sound funny
  - Sid is the one presenting the information, Darren is asking intelligent questions that help Sid elaborate the facts.
Here's some of the facts from the topic.
"""

styles = {'casual':casual_podcast_prompt,
          'debate': debate_podcast_prompt}


## Function to create the podcast script
def create_podcast_script(podcast_points, output_style):
  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
  instructPrompt = styles[output_style]
  request = instructPrompt + '\n' + podcast_points
  chatOutput = client.chat.completions.create(model="gpt-4o-mini",
                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
                                                      {"role": "user", "content": request}
                                                      ]
                                            )
  return chatOutput.choices[0].message.content


## Function to call all the podcast script generation steps
def create_podcast(input_path, output_style):
  input_text = read_data_from_PDF(input_path)
  input_chunks = create_input_chunks(input_text)
  podcastHighlights = create_summary_points(input_chunks)
  podcastScript = create_podcast_script(podcastHighlights, output_style)
  return podcastScript


## Function to generate speech from input text
def openai_generation(input_text, speaker_voice, model_choice="tts-1"):
  client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'))
  response = client.audio.speech.create(
      model=model_choice,
      voice=speaker_voice,
      input=input_text
  )
  return response.read()


## Function to generate complete audio podcast from script
## NOTE: this function assumes that there are only two speakers; please modify if you have multiple speakers in the script
def create_podcast_audio(podcastScript, speakerName1="Sid", speakerChoice1='onyx', speakerName2="Darren", speakerChoice2='echo'):
  genPodcast = []
  podcastLines = podcastScript.split('\n\n')
  podcastLineNumber = 0
  for line in podcastLines:
    if podcastLineNumber % 2 == 0:
      speakerChoice = speakerChoice1
      line = line.replace(speakerName1+":", '')
    else:
      speakerChoice = speakerChoice2
      line = line.replace(speakerName2+":", '')
    genVoice = openai_generation(input_text=line, speaker_voice=speakerChoice, model_choice="tts-1")
    genPodcast.append(genVoice)
    podcastLineNumber += 1
  with open("genPodcast.mp3", "wb") as f:
    for pod in genPodcast:
      f.write(pod)
  return "genPodcast.mp3"

import gradio as gr

def upload_file(file):
    return file.name

with gr.Blocks() as demo:
    file_output = gr.File()
    upload_button = gr.UploadButton("Click to Upload a PDF", file_types=[".pdf"], file_count="single")
    upload_button.upload(upload_file, upload_button, file_output)
    podcast_style = gr.Dropdown(styles.keys(), label="podcast_style")
    generate_podcast_button = gr.Button("Generate Podcast Script")
    podcast_script = gr.Textbox(interactive=True, label="podcast_script")

    generate_podcast_button.click(fn=create_podcast, inputs=[file_output, podcast_style], outputs=podcast_script, api_name="generate_podcast_script")

    generate_audio_button = gr.Button("Generate Audio Version")
    podcast_audio = gr.Audio(label="podcast_audio", interactive=False, type="filepath")
    generate_audio_button.click(fn=create_podcast_audio, inputs=podcast_script, outputs=podcast_audio, api_name="generate_podcast_audio")

demo.launch(debug=True, share=True)