Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
from pydub import AudioSegment
|
| 3 |
from google import genai
|
| 4 |
from google.genai import types
|
|
@@ -284,7 +284,7 @@ Follow this example structure:
|
|
| 284 |
|
| 285 |
try:
|
| 286 |
if progress:
|
| 287 |
-
progress(0.3, "Generating podcast script...")
|
| 288 |
|
| 289 |
# Add timeout to the API call
|
| 290 |
response = await asyncio.wait_for(
|
|
@@ -306,19 +306,19 @@ Follow this example structure:
|
|
| 306 |
timeout=60 # 60 seconds timeout
|
| 307 |
)
|
| 308 |
except asyncio.TimeoutError:
|
| 309 |
-
raise
|
| 310 |
except Exception as e:
|
| 311 |
if "API key not valid" in str(e):
|
| 312 |
-
raise
|
| 313 |
elif "rate limit" in str(e).lower():
|
| 314 |
-
raise
|
| 315 |
else:
|
| 316 |
-
raise
|
| 317 |
|
| 318 |
print(f"Generated podcast script:\n{response.text}")
|
| 319 |
|
| 320 |
if progress:
|
| 321 |
-
progress(0.4, "Script generated successfully!")
|
| 322 |
|
| 323 |
return json.loads(response.text)
|
| 324 |
|
|
@@ -327,7 +327,7 @@ Follow this example structure:
|
|
| 327 |
# Check file size before reading
|
| 328 |
file_size = os.path.getsize(file_obj.name)
|
| 329 |
if file_size > MAX_FILE_SIZE_BYTES:
|
| 330 |
-
raise
|
| 331 |
|
| 332 |
async with aiofiles.open(file_obj.name, 'rb') as f:
|
| 333 |
return await f.read()
|
|
@@ -356,7 +356,7 @@ Follow this example structure:
|
|
| 356 |
except asyncio.TimeoutError:
|
| 357 |
if os.path.exists(temp_filename):
|
| 358 |
os.remove(temp_filename)
|
| 359 |
-
raise
|
| 360 |
except Exception as e:
|
| 361 |
if os.path.exists(temp_filename):
|
| 362 |
os.remove(temp_filename)
|
|
@@ -364,7 +364,7 @@ Follow this example structure:
|
|
| 364 |
|
| 365 |
async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
|
| 366 |
if progress:
|
| 367 |
-
progress(0.9, "Combining audio files...")
|
| 368 |
|
| 369 |
combined_audio = AudioSegment.empty()
|
| 370 |
for audio_file in audio_files:
|
|
@@ -375,14 +375,14 @@ Follow this example structure:
|
|
| 375 |
combined_audio.export(output_filename, format="wav")
|
| 376 |
|
| 377 |
if progress:
|
| 378 |
-
progress(1.0, "Podcast generated successfully!")
|
| 379 |
|
| 380 |
return output_filename
|
| 381 |
|
| 382 |
async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
|
| 383 |
try:
|
| 384 |
if progress:
|
| 385 |
-
progress(0.1, "Starting podcast generation...")
|
| 386 |
|
| 387 |
# Set overall timeout for the entire process
|
| 388 |
return await asyncio.wait_for(
|
|
@@ -390,18 +390,18 @@ Follow this example structure:
|
|
| 390 |
timeout=600 # 10 minutes total timeout
|
| 391 |
)
|
| 392 |
except asyncio.TimeoutError:
|
| 393 |
-
raise
|
| 394 |
except Exception as e:
|
| 395 |
-
raise
|
| 396 |
|
| 397 |
async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
|
| 398 |
if progress:
|
| 399 |
-
progress(0.2, "Generating podcast script...")
|
| 400 |
|
| 401 |
podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
|
| 402 |
|
| 403 |
if progress:
|
| 404 |
-
progress(0.5, "Converting text to speech...")
|
| 405 |
|
| 406 |
# Process TTS in batches to prevent overwhelming the system
|
| 407 |
audio_files = []
|
|
@@ -410,7 +410,7 @@ Follow this example structure:
|
|
| 410 |
for i, item in enumerate(podcast_json['podcast']):
|
| 411 |
if progress:
|
| 412 |
current_progress = 0.5 + (0.4 * (i / total_lines))
|
| 413 |
-
progress(current_progress, f"Processing speech {i+1}/{total_lines}...")
|
| 414 |
|
| 415 |
try:
|
| 416 |
audio_file = await self.tts_generate(item['line'], item['speaker'], speaker1, speaker2)
|
|
@@ -420,12 +420,12 @@ Follow this example structure:
|
|
| 420 |
for file in audio_files:
|
| 421 |
if os.path.exists(file):
|
| 422 |
os.remove(file)
|
| 423 |
-
raise
|
| 424 |
|
| 425 |
combined_audio = await self.combine_audio_files(audio_files, progress)
|
| 426 |
return combined_audio
|
| 427 |
-
|
| 428 |
-
async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=
|
| 429 |
start_time = time.time()
|
| 430 |
|
| 431 |
voice_names = {
|
|
@@ -443,12 +443,13 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
|
|
| 443 |
speaker2 = voice_names[speaker2]
|
| 444 |
|
| 445 |
try:
|
| 446 |
-
progress
|
|
|
|
| 447 |
|
| 448 |
if not api_key:
|
| 449 |
api_key = os.getenv("GENAI_API_KEY")
|
| 450 |
if not api_key:
|
| 451 |
-
raise
|
| 452 |
|
| 453 |
podcast_generator = PodcastGenerator()
|
| 454 |
podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key, input_file, progress)
|
|
@@ -461,18 +462,25 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
|
|
| 461 |
# Ensure we show a user-friendly error
|
| 462 |
error_msg = str(e)
|
| 463 |
if "rate limit" in error_msg.lower():
|
| 464 |
-
raise
|
| 465 |
elif "timeout" in error_msg.lower():
|
| 466 |
-
raise
|
| 467 |
else:
|
| 468 |
-
raise
|
| 469 |
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
"Auto Detect",
|
| 477 |
"Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
|
| 478 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
|
@@ -487,20 +495,10 @@ iface = gr.Interface(
|
|
| 487 |
"Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
|
| 488 |
"Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
|
| 489 |
"Uzbek", "Vietnamese", "Welsh", "Zulu"
|
| 490 |
-
]
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
"Ava - English (United States)",
|
| 495 |
-
"Brian - English (United States)",
|
| 496 |
-
"Emma - English (United States)",
|
| 497 |
-
"Florian - German (Germany)",
|
| 498 |
-
"Seraphina - German (Germany)",
|
| 499 |
-
"Remy - French (France)",
|
| 500 |
-
"Vivienne - French (France)"
|
| 501 |
-
],
|
| 502 |
-
value="Andrew - English (United States)"),
|
| 503 |
-
gr.Dropdown(label="Speaker 2 Voice", choices=[
|
| 504 |
"Andrew - English (United States)",
|
| 505 |
"Ava - English (United States)",
|
| 506 |
"Brian - English (United States)",
|
|
@@ -509,17 +507,95 @@ iface = gr.Interface(
|
|
| 509 |
"Seraphina - German (Germany)",
|
| 510 |
"Remy - French (France)",
|
| 511 |
"Vivienne - French (France)"
|
| 512 |
-
]
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
if __name__ == "__main__":
|
| 525 |
-
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
from pydub import AudioSegment
|
| 3 |
from google import genai
|
| 4 |
from google.genai import types
|
|
|
|
| 284 |
|
| 285 |
try:
|
| 286 |
if progress:
|
| 287 |
+
progress.progress(0.3, "Generating podcast script...")
|
| 288 |
|
| 289 |
# Add timeout to the API call
|
| 290 |
response = await asyncio.wait_for(
|
|
|
|
| 306 |
timeout=60 # 60 seconds timeout
|
| 307 |
)
|
| 308 |
except asyncio.TimeoutError:
|
| 309 |
+
raise Exception("The script generation request timed out. Please try again later.")
|
| 310 |
except Exception as e:
|
| 311 |
if "API key not valid" in str(e):
|
| 312 |
+
raise Exception("Invalid API key. Please provide a valid Gemini API key.")
|
| 313 |
elif "rate limit" in str(e).lower():
|
| 314 |
+
raise Exception("Rate limit exceeded for the API key. Please try again later or provide your own Gemini API key.")
|
| 315 |
else:
|
| 316 |
+
raise Exception(f"Failed to generate podcast script: {e}")
|
| 317 |
|
| 318 |
print(f"Generated podcast script:\n{response.text}")
|
| 319 |
|
| 320 |
if progress:
|
| 321 |
+
progress.progress(0.4, "Script generated successfully!")
|
| 322 |
|
| 323 |
return json.loads(response.text)
|
| 324 |
|
|
|
|
| 327 |
# Check file size before reading
|
| 328 |
file_size = os.path.getsize(file_obj.name)
|
| 329 |
if file_size > MAX_FILE_SIZE_BYTES:
|
| 330 |
+
raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
|
| 331 |
|
| 332 |
async with aiofiles.open(file_obj.name, 'rb') as f:
|
| 333 |
return await f.read()
|
|
|
|
| 356 |
except asyncio.TimeoutError:
|
| 357 |
if os.path.exists(temp_filename):
|
| 358 |
os.remove(temp_filename)
|
| 359 |
+
raise Exception("Text-to-speech generation timed out. Please try with a shorter text.")
|
| 360 |
except Exception as e:
|
| 361 |
if os.path.exists(temp_filename):
|
| 362 |
os.remove(temp_filename)
|
|
|
|
| 364 |
|
| 365 |
async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
|
| 366 |
if progress:
|
| 367 |
+
progress.progress(0.9, "Combining audio files...")
|
| 368 |
|
| 369 |
combined_audio = AudioSegment.empty()
|
| 370 |
for audio_file in audio_files:
|
|
|
|
| 375 |
combined_audio.export(output_filename, format="wav")
|
| 376 |
|
| 377 |
if progress:
|
| 378 |
+
progress.progress(1.0, "Podcast generated successfully!")
|
| 379 |
|
| 380 |
return output_filename
|
| 381 |
|
| 382 |
async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
|
| 383 |
try:
|
| 384 |
if progress:
|
| 385 |
+
progress.progress(0.1, "Starting podcast generation...")
|
| 386 |
|
| 387 |
# Set overall timeout for the entire process
|
| 388 |
return await asyncio.wait_for(
|
|
|
|
| 390 |
timeout=600 # 10 minutes total timeout
|
| 391 |
)
|
| 392 |
except asyncio.TimeoutError:
|
| 393 |
+
raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
|
| 394 |
except Exception as e:
|
| 395 |
+
raise Exception(f"Error generating podcast: {str(e)}")
|
| 396 |
|
| 397 |
async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
|
| 398 |
if progress:
|
| 399 |
+
progress.progress(0.2, "Generating podcast script...")
|
| 400 |
|
| 401 |
podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
|
| 402 |
|
| 403 |
if progress:
|
| 404 |
+
progress.progress(0.5, "Converting text to speech...")
|
| 405 |
|
| 406 |
# Process TTS in batches to prevent overwhelming the system
|
| 407 |
audio_files = []
|
|
|
|
| 410 |
for i, item in enumerate(podcast_json['podcast']):
|
| 411 |
if progress:
|
| 412 |
current_progress = 0.5 + (0.4 * (i / total_lines))
|
| 413 |
+
progress.progress(current_progress, f"Processing speech {i+1}/{total_lines}...")
|
| 414 |
|
| 415 |
try:
|
| 416 |
audio_file = await self.tts_generate(item['line'], item['speaker'], speaker1, speaker2)
|
|
|
|
| 420 |
for file in audio_files:
|
| 421 |
if os.path.exists(file):
|
| 422 |
os.remove(file)
|
| 423 |
+
raise Exception(f"Error generating speech for line {i+1}: {str(e)}")
|
| 424 |
|
| 425 |
combined_audio = await self.combine_audio_files(audio_files, progress)
|
| 426 |
return combined_audio
|
| 427 |
+
|
| 428 |
+
async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=None) -> str:
|
| 429 |
start_time = time.time()
|
| 430 |
|
| 431 |
voice_names = {
|
|
|
|
| 443 |
speaker2 = voice_names[speaker2]
|
| 444 |
|
| 445 |
try:
|
| 446 |
+
if progress:
|
| 447 |
+
progress.progress(0.05, "Processing input...")
|
| 448 |
|
| 449 |
if not api_key:
|
| 450 |
api_key = os.getenv("GENAI_API_KEY")
|
| 451 |
if not api_key:
|
| 452 |
+
raise Exception("No API key provided. Please provide a Gemini API key.")
|
| 453 |
|
| 454 |
podcast_generator = PodcastGenerator()
|
| 455 |
podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key, input_file, progress)
|
|
|
|
| 462 |
# Ensure we show a user-friendly error
|
| 463 |
error_msg = str(e)
|
| 464 |
if "rate limit" in error_msg.lower():
|
| 465 |
+
raise Exception("Rate limit exceeded. Please try again later or use your own API key.")
|
| 466 |
elif "timeout" in error_msg.lower():
|
| 467 |
+
raise Exception("The request timed out. This could be due to server load or the length of your input. Please try again with shorter text.")
|
| 468 |
else:
|
| 469 |
+
raise Exception(f"Error: {error_msg}")
|
| 470 |
|
| 471 |
+
# Streamlit UI
|
| 472 |
+
def main():
|
| 473 |
+
st.set_page_config(page_title="PodcastGen 🎙️", page_icon="🎙️", layout="wide")
|
| 474 |
+
|
| 475 |
+
st.title("PodcastGen 🎙️")
|
| 476 |
+
st.write("Generate a 2-speaker podcast from text input or documents!")
|
| 477 |
+
|
| 478 |
+
with st.sidebar:
|
| 479 |
+
st.header("Configuration")
|
| 480 |
+
api_key = st.text_input("Your Gemini API Key (Optional)", type="password",
|
| 481 |
+
help="In case you are getting rate limited")
|
| 482 |
+
|
| 483 |
+
language_options = [
|
| 484 |
"Auto Detect",
|
| 485 |
"Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
|
| 486 |
"Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
|
|
|
|
| 495 |
"Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
|
| 496 |
"Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
|
| 497 |
"Uzbek", "Vietnamese", "Welsh", "Zulu"
|
| 498 |
+
]
|
| 499 |
+
language = st.selectbox("Language", language_options, index=0)
|
| 500 |
+
|
| 501 |
+
voice_options = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
"Andrew - English (United States)",
|
| 503 |
"Ava - English (United States)",
|
| 504 |
"Brian - English (United States)",
|
|
|
|
| 507 |
"Seraphina - German (Germany)",
|
| 508 |
"Remy - French (France)",
|
| 509 |
"Vivienne - French (France)"
|
| 510 |
+
]
|
| 511 |
+
speaker1 = st.selectbox("Speaker 1 Voice", voice_options, index=0)
|
| 512 |
+
speaker2 = st.selectbox("Speaker 2 Voice", voice_options, index=1)
|
| 513 |
+
|
| 514 |
+
col1, col2 = st.columns([2, 1])
|
| 515 |
+
|
| 516 |
+
with col1:
|
| 517 |
+
input_text = st.text_area("Input Text", height=250)
|
| 518 |
+
|
| 519 |
+
with col2:
|
| 520 |
+
uploaded_file = st.file_uploader("Or Upload a PDF or TXT file", type=["pdf", "txt"])
|
| 521 |
+
|
| 522 |
+
if st.button("Generate Podcast"):
|
| 523 |
+
if not input_text and not uploaded_file:
|
| 524 |
+
st.error("Please provide either input text or upload a file.")
|
| 525 |
+
return
|
| 526 |
+
|
| 527 |
+
# Create a progress bar for the async operation
|
| 528 |
+
progress_bar = st.progress(0)
|
| 529 |
+
status_text = st.empty()
|
| 530 |
+
|
| 531 |
+
# Create a progress wrapper for compatibility with the existing code
|
| 532 |
+
class StreamlitProgress:
|
| 533 |
+
def progress(self, value, text=None):
|
| 534 |
+
progress_bar.progress(value)
|
| 535 |
+
if text:
|
| 536 |
+
status_text.text(text)
|
| 537 |
+
|
| 538 |
+
try:
|
| 539 |
+
# Prepare file if uploaded
|
| 540 |
+
file_obj = None
|
| 541 |
+
if uploaded_file:
|
| 542 |
+
# Save the uploaded file to a temporary location
|
| 543 |
+
file_path = f"temp_upload_{uuid.uuid4()}{os.path.splitext(uploaded_file.name)[1]}"
|
| 544 |
+
with open(file_path, "wb") as f:
|
| 545 |
+
f.write(uploaded_file.getbuffer())
|
| 546 |
+
|
| 547 |
+
class FileWrapper:
|
| 548 |
+
def __init__(self, path, name):
|
| 549 |
+
self.name = name
|
| 550 |
+
self.path = path
|
| 551 |
+
|
| 552 |
+
@property
|
| 553 |
+
def name(self):
|
| 554 |
+
return self._name
|
| 555 |
+
|
| 556 |
+
@name.setter
|
| 557 |
+
def name(self, value):
|
| 558 |
+
self._name = value
|
| 559 |
+
|
| 560 |
+
file_obj = FileWrapper(file_path, uploaded_file.name)
|
| 561 |
+
file_obj.name = file_path # Set the path as the name for proper file reading
|
| 562 |
+
|
| 563 |
+
# Run the async function in a new event loop
|
| 564 |
+
progress_wrapper = StreamlitProgress()
|
| 565 |
+
audio_file = asyncio.run(process_input(
|
| 566 |
+
input_text,
|
| 567 |
+
file_obj,
|
| 568 |
+
language,
|
| 569 |
+
speaker1,
|
| 570 |
+
speaker2,
|
| 571 |
+
api_key,
|
| 572 |
+
progress_wrapper
|
| 573 |
+
))
|
| 574 |
+
|
| 575 |
+
# Display the audio
|
| 576 |
+
st.subheader("Generated Podcast")
|
| 577 |
+
st.audio(audio_file, format="audio/wav")
|
| 578 |
+
|
| 579 |
+
# Provide a download button
|
| 580 |
+
with open(audio_file, "rb") as f:
|
| 581 |
+
audio_bytes = f.read()
|
| 582 |
+
|
| 583 |
+
st.download_button(
|
| 584 |
+
label="Download Podcast",
|
| 585 |
+
data=audio_bytes,
|
| 586 |
+
file_name="podcast.wav",
|
| 587 |
+
mime="audio/wav"
|
| 588 |
+
)
|
| 589 |
+
|
| 590 |
+
# Clean up the temporary file
|
| 591 |
+
if file_obj:
|
| 592 |
+
try:
|
| 593 |
+
os.remove(file_path)
|
| 594 |
+
except:
|
| 595 |
+
pass
|
| 596 |
+
|
| 597 |
+
except Exception as e:
|
| 598 |
+
st.error(str(e))
|
| 599 |
|
| 600 |
if __name__ == "__main__":
|
| 601 |
+
main()
|