suprimedev commited on
Commit
489d3a3
·
verified ·
1 Parent(s): e5fd7e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -342
app.py CHANGED
@@ -1,359 +1,133 @@
1
- import gradio as gr
2
- from pydub import AudioSegment
3
  import json
4
- import uuid
5
- import aiohttp
6
- import asyncio
7
- import os
8
  import time
9
- from typing import List, Dict
10
-
11
- # Constants
12
- MAX_FILE_SIZE_MB = 20
13
- MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
14
-
15
- class PodcastGenerator:
16
- def __init__(self):
17
- self.api_key = "sk-4fb613f56acfccf731e801b904cd89f5"
18
- self.api_url = "https://talkbot.ir/api/v1/chat/completions"
19
- self.tts_url = "https://talkbot.ir/TTS-tkun"
20
-
21
- async def generate_script(self, prompt: str, language: str, file_obj=None, progress=None) -> Dict:
22
- example = """
23
- {
24
- "topic": "AGI",
25
- "podcast": [
26
- {
27
- "speaker": 2,
28
- "line": "So, AGI, huh? Seems like everyone's talking about it these days."
29
- },
30
- {
31
- "speaker": 1,
32
- "line": "Yeah, it's definitely having a moment, isn't it?"
33
- }
34
- ]
35
- }
36
- """
37
-
38
- if language == "Auto Detect":
39
- language_instruction = "- The podcast MUST be in the same language as the user input."
40
- else:
41
- language_instruction = f"- The podcast MUST be in {language} language"
42
-
43
- system_prompt = f"""
44
- You are a professional podcast generator. Your task is to generate a professional podcast script based on the user input.
45
- {language_instruction}
46
- - The podcast should have 2 speakers.
47
- - The podcast should be long.
48
- - Do not use names for the speakers.
49
- - The podcast should be interesting, lively, and engaging, and hook the listener from the start.
50
- - The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
51
- - The script must be in JSON format.
52
- Follow this example structure:
53
- {example}
54
- """
55
-
56
- user_prompt = ""
57
- if prompt and file_obj:
58
- user_prompt = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
59
- elif prompt:
60
- user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
61
- else:
62
- user_prompt = "Please generate a podcast script based on the uploaded file."
63
-
64
- # If file is provided, read its content
65
- file_content = ""
66
- if file_obj:
67
- try:
68
- file_bytes = await self._read_file_bytes(file_obj)
69
- file_content = file_bytes.decode('utf-8', errors='ignore')
70
- user_prompt = f"{user_prompt}\n\nFile content:\n{file_content}"
71
- except Exception as e:
72
- raise Exception(f"Failed to read file: {str(e)}")
73
-
74
- messages = [
75
- {"role": "system", "content": system_prompt},
76
- {"role": "user", "content": user_prompt}
77
- ]
78
-
79
- headers = {
80
- "Authorization": f"Bearer {self.api_key}",
81
- "Content-Type": "application/json"
82
- }
83
-
84
- payload = {
85
- "model": "deepseek-v3-0324",
86
- "messages": messages,
87
- "temperature": 1,
88
- "response_format": { "type": "json_object" }
89
- }
90
-
91
- try:
92
- if progress:
93
- progress(0.3, "Generating podcast script...")
94
-
95
- async with aiohttp.ClientSession() as session:
96
- async with session.post(
97
- self.api_url,
98
- headers=headers,
99
- json=payload,
100
- timeout=60
101
- ) as response:
102
-
103
- if response.status != 200:
104
- error_msg = await response.text()
105
- raise Exception(f"API request failed: {error_msg}")
106
-
107
- data = await response.json()
108
- response_text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
109
-
110
- if not response_text:
111
- raise Exception("Empty response from API")
112
-
113
- if progress:
114
- progress(0.4, "Script generated successfully!")
115
-
116
- return json.loads(response_text)
117
-
118
- except asyncio.TimeoutError:
119
- raise Exception("The script generation request timed out. Please try again later.")
120
- except json.JSONDecodeError:
121
- raise Exception("Invalid JSON response from API")
122
- except Exception as e:
123
- if "rate limit" in str(e).lower():
124
- raise Exception("Rate limit exceeded. Please try again later.")
125
- else:
126
- raise Exception(f"Failed to generate podcast script: {e}")
127
 
128
- async def _read_file_bytes(self, file_obj) -> bytes:
129
- """Read file bytes from a file object"""
130
- # Check file size before reading
131
- if hasattr(file_obj, 'size'):
132
- file_size = file_obj.size
133
- else:
134
- file_size = os.path.getsize(file_obj.name)
135
-
136
- if file_size > MAX_FILE_SIZE_BYTES:
137
- raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
138
-
139
- if hasattr(file_obj, 'read'):
140
- return file_obj.read()
141
- else:
142
- async with aiofiles.open(file_obj.name, 'rb') as f:
143
- return await f.read()
144
 
145
- async def tts_generate(self, text: str) -> str:
146
- headers = {
147
- 'accept': 'application/json',
148
- }
149
-
150
- params = {
151
- 'text': text,
152
- }
153
-
154
- temp_filename = f"temp_{uuid.uuid4()}.wav"
155
-
156
- try:
157
- async with aiohttp.ClientSession() as session:
158
- async with session.get(
159
- self.tts_url,
160
- params=params,
161
- headers=headers,
162
- timeout=30
163
- ) as response:
164
-
165
- if response.status != 200:
166
- error_msg = await response.text()
167
- raise Exception(f"TTS API error: {error_msg}")
168
-
169
- # Save the audio file
170
- async with aiofiles.open(temp_filename, 'wb') as f:
171
- await f.write(await response.read())
172
-
173
- return temp_filename
174
-
175
- except asyncio.TimeoutError:
176
- if os.path.exists(temp_filename):
177
- os.remove(temp_filename)
178
- raise Exception("Text-to-speech generation timed out.")
179
- except Exception as e:
180
- if os.path.exists(temp_filename):
181
- os.remove(temp_filename)
182
- raise e
183
 
184
- async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
185
- if progress:
186
- progress(0.9, "Combining audio files...")
187
-
188
- combined_audio = AudioSegment.empty()
189
- for audio_file in audio_files:
190
- combined_audio += AudioSegment.from_file(audio_file)
191
- os.remove(audio_file) # Clean up temporary files
192
 
193
- output_filename = f"output_{uuid.uuid4()}.wav"
194
- combined_audio.export(output_filename, format="wav")
 
 
 
 
 
195
 
196
- if progress:
197
- progress(1.0, "Podcast generated successfully!")
198
-
199
- return output_filename
200
-
201
- async def generate_podcast(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
202
- try:
203
- if progress:
204
- progress(0.1, "Starting podcast generation...")
205
-
206
- # Set overall timeout for the entire process
207
- return await asyncio.wait_for(
208
- self._generate_podcast_internal(input_text, language, file_obj, progress),
209
- timeout=600 # 10 minutes total timeout
210
- )
211
- except asyncio.TimeoutError:
212
- raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
213
- except Exception as e:
214
- raise Exception(f"Error generating podcast: {str(e)}")
215
-
216
- async def _generate_podcast_internal(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
217
- if progress:
218
- progress(0.2, "Generating podcast script...")
219
-
220
- podcast_json = await self.generate_script(input_text, language, file_obj, progress)
221
 
222
- if progress:
223
- progress(0.5, "Converting text to speech...")
224
 
225
- audio_files = []
226
- total_lines = len(podcast_json['podcast'])
227
 
228
- # Process in batches
229
- batch_size = 5 # Conservative batch size
230
- for batch_start in range(0, total_lines, batch_size):
231
- batch_end = min(batch_start + batch_size, total_lines)
232
- batch = podcast_json['podcast'][batch_start:batch_end]
233
-
234
- # Create tasks for concurrent processing
235
- tts_tasks = []
236
- for item in batch:
237
- tts_task = self.tts_generate(item['line'])
238
- tts_tasks.append(tts_task)
239
-
240
- try:
241
- batch_results = await asyncio.gather(*tts_tasks, return_exceptions=True)
242
-
243
- for i, result in enumerate(batch_results):
244
- if isinstance(result, Exception):
245
- # Clean up any files already created
246
- for file in audio_files:
247
- if os.path.exists(file):
248
- os.remove(file)
249
- raise Exception(f"Error generating speech: {str(result)}")
250
- else:
251
- audio_files.append(result)
252
-
253
- # Update progress
254
- if progress:
255
- current_progress = 0.5 + (0.4 * (batch_end / total_lines))
256
- progress(current_progress, f"Processed {batch_end}/{total_lines} speech segments...")
257
-
258
- except Exception as e:
259
- # Clean up any files already created
260
- for file in audio_files:
261
- if os.path.exists(file):
262
- os.remove(file)
263
- raise Exception(f"Error in batch TTS generation: {str(e)}")
264
 
265
- combined_audio = await self.combine_audio_files(audio_files, progress)
266
- return combined_audio
267
-
268
- async def process_input(input_text: str, input_file, language: str, progress=None) -> str:
269
- start_time = time.time()
270
-
271
- try:
272
- if progress:
273
- progress(0.05, "Processing input...")
274
-
275
- podcast_generator = PodcastGenerator()
276
- podcast = await podcast_generator.generate_podcast(input_text, language, input_file, progress)
277
-
278
- end_time = time.time()
279
- print(f"Total podcast generation time: {end_time - start_time:.2f} seconds")
280
- return podcast
281
 
 
 
282
  except Exception as e:
283
- error_msg = str(e)
284
- if "rate limit" in error_msg.lower():
285
- raise Exception("Rate limit exceeded. Please try again later.")
286
- elif "timeout" in error_msg.lower():
287
- raise Exception("The request timed out. Please try again with shorter text.")
288
- else:
289
- raise Exception(f"Error: {error_msg}")
290
-
291
- # Gradio UI
292
- def generate_podcast_gradio(input_text, input_file, language, progress=gr.Progress()):
293
- # Handle the file if uploaded
294
- file_obj = None
295
- if input_file is not None:
296
- file_obj = input_file
297
-
298
- # Use the progress function from Gradio
299
- def progress_callback(value, text):
300
- progress(value, text)
301
-
302
- # Run the async function in the event loop
303
- result = asyncio.run(process_input(
304
- input_text,
305
- file_obj,
306
- language,
307
- progress_callback
308
- ))
309
 
310
- return result
311
-
312
- def main():
313
- # Define language options
314
- language_options = [
315
- "Auto Detect",
316
- "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
317
- "Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
318
- "Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
319
- "Chinese Taiwanese", "Croatian", "Czech", "Danish", "Dutch", "English",
320
- "Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
321
- "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
322
- "Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
323
- "Lao", "Latvian", "Lithuanian", "Macedonian", "Malay", "Malayalam",
324
- "Maltese", "Mongolian", "Nepali", "Norwegian Bokmål", "Pashto", "Persian",
325
- "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Sinhala",
326
- "Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
327
- "Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
328
- "Uzbek", "Vietnamese", "Welsh", "Zulu"
329
- ]
330
 
331
- # Create Gradio interface
332
- with gr.Blocks(title="PodcastGen 🎙️") as demo:
333
- gr.Markdown("# PodcastGen 🎙️")
334
- gr.Markdown("Generate a 2-speaker podcast from text input or documents!")
335
-
336
- with gr.Row():
337
- with gr.Column(scale=2):
338
- input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter text for podcast generation...")
339
-
340
- with gr.Column(scale=1):
341
- input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"])
342
-
343
- language = gr.Dropdown(label="Language", choices=language_options, value="Auto Detect")
344
-
345
- generate_btn = gr.Button("Generate Podcast", variant="primary")
346
-
347
- with gr.Row():
348
- output_audio = gr.Audio(label="Generated Podcast", type="filepath", format="wav")
349
-
350
- generate_btn.click(
351
- fn=generate_podcast_gradio,
352
- inputs=[input_text, input_file, language],
353
- outputs=[output_audio]
354
- )
355
 
356
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
- if __name__ == "__main__":
359
- main()
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ import requests
3
  import json
 
 
 
 
4
  import time
5
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # تنظیمات API
10
+ TTS_API_URL = "https://talkbot.ir/TTS-tkun"
11
+ AI_API_URL = "https://talkbot.ir/api/v1/chat/completions"
12
+ AI_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
13
+ MODEL_NAME = "deepseek-v3-0324"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ @app.route('/')
16
+ def home():
17
+ return render_template('index.html')
 
 
 
 
 
18
 
19
+ @app.route('/generate_podcast', methods=['POST'])
20
+ def generate_podcast():
21
+ try:
22
+ # دریافت موضوع پادکست از فرم
23
+ topic = request.form.get('topic')
24
+ voice1 = request.form.get('voice1', 'male')
25
+ voice2 = request.form.get('voice2', 'female')
26
 
27
+ if not topic:
28
+ return jsonify({'error': 'لطفا موضوع پادکست را وارد کنید'}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ # تولید متون پادکست با هوش مصنوعی
31
+ conversation = generate_conversation(topic)
32
 
33
+ if not conversation or 'parts' not in conversation:
34
+ return jsonify({'error': 'خطا در تولید محتوای پادکست'}), 500
35
 
36
+ # تولید فایل‌های صوتی
37
+ audio_urls = []
38
+ for i, part in enumerate(conversation['parts']):
39
+ voice = voice1 if i % 2 == 0 else voice2
40
+ audio_url = generate_tts(part['text'], voice)
41
+ if audio_url:
42
+ audio_urls.append({
43
+ 'text': part['text'],
44
+ 'speaker': "گوینده اول" if i % 2 == 0 else "گوینده دوم",
45
+ 'audio_url': audio_url
46
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # ترکیب اطلاعات پادکست
49
+ podcast_data = {
50
+ 'title': conversation.get('title', 'پادکست تولید شده'),
51
+ 'description': conversation.get('description', 'پادکست تولید شده با هوش مصنوعی'),
52
+ 'topic': topic,
53
+ 'parts': audio_urls
54
+ }
 
 
 
 
 
 
 
 
 
55
 
56
+ return jsonify(podcast_data)
57
+
58
  except Exception as e:
59
+ print(f"Error: {str(e)}")
60
+ return jsonify({'error': str(e)}), 500
61
+
62
+ def generate_conversation(topic):
63
+ headers = {
64
+ 'Content-Type': 'application/json',
65
+ 'Authorization': f'Bearer {AI_API_KEY}'
66
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ prompt = f"""
69
+ یک مکالمه پادکستی جذاب درباره '{topic}' تولید کن.
70
+ پادکست باید بین دو نفر با نام‌های 'گوینده اول' و 'گوینده دوم' باشد.
71
+ مکالمه باید حداکثر 6 تکه متن داشته باشد (مجموع حدود 500 کلمه).
72
+ پاسخ را به فرمت JSON زیر برگردان:
73
+ {{
74
+ "title": "عنوان پادکست",
75
+ "description": "توضیح کوتاه درباره پادکست",
76
+ "parts": [
77
+ {{
78
+ "speaker": "گوینده اول یا دوم",
79
+ "text": "متن گفته شده"
80
+ }},
81
+ // ...
82
+ ]
83
+ }}
84
+ """
 
 
 
85
 
86
+ data = {
87
+ "model": MODEL_NAME,
88
+ "messages": [
89
+ {"role": "system", "content": "شما یک تولید کننده حرفه‌ای محتوای پادکست هستید."},
90
+ {"role": "user", "content": prompt}
91
+ ],
92
+ "temperature": 0.7
93
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ response = requests.post(AI_API_URL, headers=headers, json=data)
96
+
97
+ if response.status_code == 200:
98
+ try:
99
+ # استخراج محتوای JSON از پاسخ
100
+ content = response.json()['choices'][0]['message']['content']
101
+ # بررسی و حذف markdown code block اگر وجود دارد
102
+ if content.startswith('```json') and content.endswith('```'):
103
+ content = content[7:-3].strip()
104
+ return json.loads(content)
105
+ except json.JSONDecodeError as e:
106
+ print(f"JSON decode error: {e}")
107
+ print(f"Response content: {content}")
108
+ return None
109
+ else:
110
+ print(f"API Error: {response.status_code} - {response.text}")
111
+ return None
112
+
113
+ def generate_tts(text, voice):
114
+ params = {
115
+ 'text': text,
116
+ 'voice': voice
117
+ }
118
+
119
+ try:
120
+ response = requests.get(TTS_API_URL, params=params)
121
+ if response.status_code == 200:
122
+ # برای این مثال فرض می‌کنیم پاسخ مستقیم URL فایل است
123
+ # در صورت نیاز می‌توانید پردازش بیشتری انجام دهید
124
+ return response.text.strip()
125
+ else:
126
+ print(f"TTS Error: {response.status_code} - {response.text}")
127
+ return None
128
+ except Exception as e:
129
+ print(f"TTS Exception: {str(e)}")
130
+ return None
131
 
132
+ if __name__ == '__main__':
133
+ app.run(debug=True)