podcastgen

Runtime error

App Files Files Community

suprimedev commited on Jun 4

Commit

489d3a3

verified ·

1 Parent(s): e5fd7e2

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -342

app.py CHANGED Viewed

@@ -1,359 +1,133 @@
-import gradio as gr
-from pydub import AudioSegment
 import json
-import uuid
-import aiohttp
-import asyncio
-import os
 import time
-from typing import List, Dict
-# Constants
-MAX_FILE_SIZE_MB = 20
-MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024  # Convert MB to bytes
-class PodcastGenerator:
-    def __init__(self):
-        self.api_key = "sk-4fb613f56acfccf731e801b904cd89f5"
-        self.api_url = "https://talkbot.ir/api/v1/chat/completions"
-        self.tts_url = "https://talkbot.ir/TTS-tkun"
-    async def generate_script(self, prompt: str, language: str, file_obj=None, progress=None) -> Dict:
-        example = """
-{
-    "topic": "AGI",
-    "podcast": [
-        {
-            "speaker": 2,
-            "line": "So, AGI, huh? Seems like everyone's talking about it these days."
-        },
-        {
-            "speaker": 1,
-            "line": "Yeah, it's definitely having a moment, isn't it?"
-        }
-    ]
-}
-        """
-        if language == "Auto Detect":
-            language_instruction = "- The podcast MUST be in the same language as the user input."
-        else:
-            language_instruction = f"- The podcast MUST be in {language} language"
-        system_prompt = f"""
-You are a professional podcast generator. Your task is to generate a professional podcast script based on the user input.
-{language_instruction}
-- The podcast should have 2 speakers.
-- The podcast should be long.
-- Do not use names for the speakers.
-- The podcast should be interesting, lively, and engaging, and hook the listener from the start.
-- The input text might be disorganized or unformatted, originating from sources like PDFs or text files. Ignore any formatting inconsistencies or irrelevant details; your task is to distill the essential points, identify key definitions, and highlight intriguing facts that would be suitable for discussion in a podcast.
-- The script must be in JSON format.
-Follow this example structure:
-{example}
-"""
-        user_prompt = ""
-        if prompt and file_obj:
-            user_prompt = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
-        elif prompt:
-            user_prompt = f"Please generate a podcast script based on the following user input:\n{prompt}"
-        else:
-            user_prompt = "Please generate a podcast script based on the uploaded file."
-        # If file is provided, read its content
-        file_content = ""
-        if file_obj:
-            try:
-                file_bytes = await self._read_file_bytes(file_obj)
-                file_content = file_bytes.decode('utf-8', errors='ignore')
-                user_prompt = f"{user_prompt}\n\nFile content:\n{file_content}"
-            except Exception as e:
-                raise Exception(f"Failed to read file: {str(e)}")
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
-        ]
-        headers = {
-            "Authorization": f"Bearer {self.api_key}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": "deepseek-v3-0324",
-            "messages": messages,
-            "temperature": 1,
-            "response_format": { "type": "json_object" }
-        }
-        try:
-            if progress:
-                progress(0.3, "Generating podcast script...")
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    self.api_url,
-                    headers=headers,
-                    json=payload,
-                    timeout=60
-                ) as response:
-                    if response.status != 200:
-                        error_msg = await response.text()
-                        raise Exception(f"API request failed: {error_msg}")
-                    data = await response.json()
-                    response_text = data.get('choices', [{}])[0].get('message', {}).get('content', '')
-                    if not response_text:
-                        raise Exception("Empty response from API")
-                    if progress:
-                        progress(0.4, "Script generated successfully!")
-                    return json.loads(response_text)
-        except asyncio.TimeoutError:
-            raise Exception("The script generation request timed out. Please try again later.")
-        except json.JSONDecodeError:
-            raise Exception("Invalid JSON response from API")
-        except Exception as e:
-            if "rate limit" in str(e).lower():
-                raise Exception("Rate limit exceeded. Please try again later.")
-            else:
-                raise Exception(f"Failed to generate podcast script: {e}")
-    async def _read_file_bytes(self, file_obj) -> bytes:
-        """Read file bytes from a file object"""
-        # Check file size before reading
-        if hasattr(file_obj, 'size'):
-            file_size = file_obj.size
-        else:
-            file_size = os.path.getsize(file_obj.name)
-        if file_size > MAX_FILE_SIZE_BYTES:
-            raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
-        if hasattr(file_obj, 'read'):
-            return file_obj.read()
-        else:
-            async with aiofiles.open(file_obj.name, 'rb') as f:
-                return await f.read()
-    async def tts_generate(self, text: str) -> str:
-        headers = {
-            'accept': 'application/json',
-        }
-        params = {
-            'text': text,
-        }
-        temp_filename = f"temp_{uuid.uuid4()}.wav"
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get(
-                    self.tts_url,
-                    params=params,
-                    headers=headers,
-                    timeout=30
-                ) as response:
-                    if response.status != 200:
-                        error_msg = await response.text()
-                        raise Exception(f"TTS API error: {error_msg}")
-                    # Save the audio file
-                    async with aiofiles.open(temp_filename, 'wb') as f:
-                        await f.write(await response.read())
-                    return temp_filename
-        except asyncio.TimeoutError:
-            if os.path.exists(temp_filename):
-                os.remove(temp_filename)
-            raise Exception("Text-to-speech generation timed out.")
-        except Exception as e:
-            if os.path.exists(temp_filename):
-                os.remove(temp_filename)
-            raise e
-    async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
-        if progress:
-            progress(0.9, "Combining audio files...")
-        combined_audio = AudioSegment.empty()
-        for audio_file in audio_files:
-            combined_audio += AudioSegment.from_file(audio_file)
-            os.remove(audio_file)  # Clean up temporary files
-        output_filename = f"output_{uuid.uuid4()}.wav"
-        combined_audio.export(output_filename, format="wav")
-        if progress:
-            progress(1.0, "Podcast generated successfully!")
-        return output_filename
-    async def generate_podcast(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
-        try:
-            if progress:
-                progress(0.1, "Starting podcast generation...")
-            # Set overall timeout for the entire process
-            return await asyncio.wait_for(
-                self._generate_podcast_internal(input_text, language, file_obj, progress),
-                timeout=600  # 10 minutes total timeout
-            )
-        except asyncio.TimeoutError:
-            raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
-        except Exception as e:
-            raise Exception(f"Error generating podcast: {str(e)}")
-    async def _generate_podcast_internal(self, input_text: str, language: str, file_obj=None, progress=None) -> str:
-        if progress:
-            progress(0.2, "Generating podcast script...")
-        podcast_json = await self.generate_script(input_text, language, file_obj, progress)
-        if progress:
-            progress(0.5, "Converting text to speech...")
-        audio_files = []
-        total_lines = len(podcast_json['podcast'])
-        # Process in batches
-        batch_size = 5  # Conservative batch size
-        for batch_start in range(0, total_lines, batch_size):
-            batch_end = min(batch_start + batch_size, total_lines)
-            batch = podcast_json['podcast'][batch_start:batch_end]
-            # Create tasks for concurrent processing
-            tts_tasks = []
-            for item in batch:
-                tts_task = self.tts_generate(item['line'])
-                tts_tasks.append(tts_task)
-            try:
-                batch_results = await asyncio.gather(*tts_tasks, return_exceptions=True)
-                for i, result in enumerate(batch_results):
-                    if isinstance(result, Exception):
-                        # Clean up any files already created
-                        for file in audio_files:
-                            if os.path.exists(file):
-                                os.remove(file)
-                        raise Exception(f"Error generating speech: {str(result)}")
-                    else:
-                        audio_files.append(result)
-                # Update progress
-                if progress:
-                    current_progress = 0.5 + (0.4 * (batch_end / total_lines))
-                    progress(current_progress, f"Processed {batch_end}/{total_lines} speech segments...")
-            except Exception as e:
-                # Clean up any files already created
-                for file in audio_files:
-                    if os.path.exists(file):
-                        os.remove(file)
-                raise Exception(f"Error in batch TTS generation: {str(e)}")
-        combined_audio = await self.combine_audio_files(audio_files, progress)
-        return combined_audio
-async def process_input(input_text: str, input_file, language: str, progress=None) -> str:
-    start_time = time.time()
-    try:
-        if progress:
-            progress(0.05, "Processing input...")
-        podcast_generator = PodcastGenerator()
-        podcast = await podcast_generator.generate_podcast(input_text, language, input_file, progress)
-        end_time = time.time()
-        print(f"Total podcast generation time: {end_time - start_time:.2f} seconds")
-        return podcast
     except Exception as e:
-        error_msg = str(e)
-        if "rate limit" in error_msg.lower():
-            raise Exception("Rate limit exceeded. Please try again later.")
-        elif "timeout" in error_msg.lower():
-            raise Exception("The request timed out. Please try again with shorter text.")
-        else:
-            raise Exception(f"Error: {error_msg}")
-# Gradio UI
-def generate_podcast_gradio(input_text, input_file, language, progress=gr.Progress()):
-    # Handle the file if uploaded
-    file_obj = None
-    if input_file is not None:
-        file_obj = input_file
-    # Use the progress function from Gradio
-    def progress_callback(value, text):
-        progress(value, text)
-    # Run the async function in the event loop
-    result = asyncio.run(process_input(
-        input_text,
-        file_obj,
-        language,
-        progress_callback
-    ))
-    return result
-def main():
-    # Define language options
-    language_options = [
-        "Auto Detect",
-        "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
-        "Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
-        "Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
-        "Chinese Taiwanese", "Croatian", "Czech", "Danish", "Dutch", "English",
-        "Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
-        "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
-        "Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
-        "Lao", "Latvian", "Lithuanian", "Macedonian", "Malay", "Malayalam",
-        "Maltese", "Mongolian", "Nepali", "Norwegian Bokmål", "Pashto", "Persian",
-        "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Sinhala",
-        "Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
-        "Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
-        "Uzbek", "Vietnamese", "Welsh", "Zulu"
-    ]
-    # Create Gradio interface
-    with gr.Blocks(title="PodcastGen 🎙️") as demo:
-        gr.Markdown("# PodcastGen 🎙️")
-        gr.Markdown("Generate a 2-speaker podcast from text input or documents!")
-        with gr.Row():
-            with gr.Column(scale=2):
-                input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter text for podcast generation...")
-            with gr.Column(scale=1):
-                input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"])
-        language = gr.Dropdown(label="Language", choices=language_options, value="Auto Detect")
-        generate_btn = gr.Button("Generate Podcast", variant="primary")
-        with gr.Row():
-            output_audio = gr.Audio(label="Generated Podcast", type="filepath", format="wav")
-        generate_btn.click(
-            fn=generate_podcast_gradio,
-            inputs=[input_text, input_file, language],
-            outputs=[output_audio]
-        )
-    demo.launch()
-if __name__ == "__main__":
-    main()

+from flask import Flask, render_template, request, jsonify
+import requests
 import json
 import time
+import os
+app = Flask(__name__)
+# تنظیمات API
+TTS_API_URL = "https://talkbot.ir/TTS-tkun"
+AI_API_URL = "https://talkbot.ir/api/v1/chat/completions"
+AI_API_KEY = "sk-4fb613f56acfccf731e801b904cd89f5"
+MODEL_NAME = "deepseek-v3-0324"
+@app.route('/')
+def home():
+    return render_template('index.html')
+@app.route('/generate_podcast', methods=['POST'])
+def generate_podcast():
+    try:
+        # دریافت موضوع پادکست از فرم
+        topic = request.form.get('topic')
+        voice1 = request.form.get('voice1', 'male')
+        voice2 = request.form.get('voice2', 'female')
+        if not topic:
+            return jsonify({'error': 'لطفا موضوع پادکست را وارد کنید'}), 400
+        # تولید متون پادکست با هوش مصنوعی
+        conversation = generate_conversation(topic)
+        if not conversation or 'parts' not in conversation:
+            return jsonify({'error': 'خطا در تولید محتوای پادکست'}), 500
+        # تولید فایل‌های صوتی
+        audio_urls = []
+        for i, part in enumerate(conversation['parts']):
+            voice = voice1 if i % 2 == 0 else voice2
+            audio_url = generate_tts(part['text'], voice)
+            if audio_url:
+                audio_urls.append({
+                    'text': part['text'],
+                    'speaker': "گوینده اول" if i % 2 == 0 else "گوینده دوم",
+                    'audio_url': audio_url
+                })
+        # ترکیب اطلاعات پادکست
+        podcast_data = {
+            'title': conversation.get('title', 'پادکست تولید شده'),
+            'description': conversation.get('description', 'پادکست تولید شده با هوش مصنوعی'),
+            'topic': topic,
+            'parts': audio_urls
+        }
+        return jsonify(podcast_data)
     except Exception as e:
+        print(f"Error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+def generate_conversation(topic):
+    headers = {
+        'Content-Type': 'application/json',
+        'Authorization': f'Bearer {AI_API_KEY}'
+    }
+    prompt = f"""
+    یک مکالمه پادکستی جذاب درباره '{topic}' تولید کن.
+    پادکست باید بین دو نفر با نام‌های 'گوینده اول' و 'گوینده دوم' باشد.
+    مکالمه باید حداکثر 6 تکه متن داشته باشد (مجموع حدود 500 کلمه).
+    پاسخ را به فرمت JSON زیر برگردان:
+    {{
+        "title": "عنوان پادکست",
+        "description": "توضیح کوتاه درباره پادکست",
+        "parts": [
+            {{
+                "speaker": "گوینده اول یا دوم",
+                "text": "متن گفته شده"
+            }},
+            // ...
+        ]
+    }}
+    """
+    data = {
+        "model": MODEL_NAME,
+        "messages": [
+            {"role": "system", "content": "شما یک تولید کننده حرفه‌ای محتوای پادکست هستید."},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0.7
+    }
+    response = requests.post(AI_API_URL, headers=headers, json=data)
+    if response.status_code == 200:
+        try:
+            # استخراج محتوای JSON از پاسخ
+            content = response.json()['choices'][0]['message']['content']
+            # بررسی و حذف markdown code block اگر وجود دارد
+            if content.startswith('```json') and content.endswith('```'):
+                content = content[7:-3].strip()
+            return json.loads(content)
+        except json.JSONDecodeError as e:
+            print(f"JSON decode error: {e}")
+            print(f"Response content: {content}")
+            return None
+    else:
+        print(f"API Error: {response.status_code} - {response.text}")
+        return None
+def generate_tts(text, voice):
+    params = {
+        'text': text,
+        'voice': voice
+    }
+    try:
+        response = requests.get(TTS_API_URL, params=params)
+        if response.status_code == 200:
+            # برای این مثال فرض می‌کنیم پاسخ مستقیم URL فایل است
+            # در صورت نیاز می‌توانید پردازش بیشتری انجام دهید
+            return response.text.strip()
+        else:
+            print(f"TTS Error: {response.status_code} - {response.text}")
+            return None
+    except Exception as e:
+        print(f"TTS Exception: {str(e)}")
+        return None
+if __name__ == '__main__':
+    app.run(debug=True)