import gradio as gr import random import time import os from elevenlabs import generate, set_api_key, save from pathlib import Path from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha") model = AutoModelForCausalLM.from_pretrained( "HuggingFaceH4/zephyr-7b-alpha", torch_dtype=torch.float16, # Use float16 for memory efficiency device_map="auto" # Automatically determine device placement ) api_key = os.getenv("ELEVENLABS_API_KEY") set_api_key(api_key) podcasts_directory = "podcasts" os.makedirs(podcasts_directory, exist_ok=True) def progress_callback(progress): if progress: if isinstance(progress, int): return progress else: try: return float(progress) except (ValueError, TypeError): return 0 return 0 def generate_podcast_intro(podcast_topic, structure, perspective, tone, existing_podcast_info): with open("prompt_engineered.txt", "r", encoding='utf-8') as file: prompt_template = file.read() prompt = prompt_template.format( podcast_topic=podcast_topic, structure=structure, perspective=perspective, tone=tone, existing_podcast_info=existing_podcast_info ) return prompt # Function to generate content def generate_content(prompt): # Format prompt for the Zephyr model (which follows ChatML format) messages = [{"role": "user", "content": prompt}] # Convert to model inputs encoded_input = tokenizer.apply_chat_template( messages, return_tensors="pt" ).to(model.device) # Generate response with torch.no_grad(): output = model.generate( encoded_input, max_new_tokens=1500, # Adjust based on desired output length do_sample=True, temperature=0.7, # Adjust for creativity vs determinism top_p=0.95 ) # Decode and return only the new tokens (response) response = tokenizer.decode(output[0][encoded_input.shape[1]:], skip_special_tokens=True) return response def generate_podcast_audio(podcast_script, voice, progress=gr.Progress()): if not api_key: return "Error: ElevenLabs API key not set. Please set the ELEVENLABS_API_KEY environment variable." try: audio = generate( text=podcast_script, voice=voice, model="eleven_turbo_v2" ) random_id = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=6)) filename = os.path.join(podcasts_directory, f"podcast_{random_id}.mp3") save(audio, filename) return filename except Exception as e: return f"Error generating audio: {str(e)}" def create_podcast(podcast_topic, structure, perspective, tone, existing_podcast_info, voice_option, progress=gr.Progress()): progress(0, desc="Generating podcast content...") prompt = generate_podcast_intro(podcast_topic, structure, perspective, tone, existing_podcast_info) progress(20, desc="Processing with AI...") podcast_content = generate_content(prompt) progress(60, desc="Generating audio...") audio_file = generate_podcast_audio(podcast_content, voice_option, progress) progress(100, desc="Complete!") return podcast_content, audio_file available_voices = [ "Adam", "Antoni", "Arnold", "Bella", "Callum", "Charlie", "Christina", "Clyde", "Daniel", "Dorothy", "Ella", "Elli", "Emily", "Fin", "Freya", "Gigi", "Giovanni", "Glinda", "Grace", "Harry", "James", "Jeremy", "Joseph", "Josh", "Knightley", "Liam", "Matilda", "Matthew", "Michael", "Nicole", "Patrick", "Rachel", "Richard", "Sam", "Sarah", "Serena", "Thomas", "Victor", "Wayne", "Charlotte" ] with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎙️ AI Podcast Generator") gr.Markdown("Generate a complete podcast with AI, including audio narration.") with gr.Row(): with gr.Column(): podcast_topic = gr.Textbox( label="Podcast Topic", placeholder="Enter the main topic of your podcast", lines=2 ) structure = gr.Radio( ["Interview Style", "Solo Monologue", "Panel Discussion", "Storytelling", "Educational"], label="Podcast Structure", value="Interview Style" ) perspective = gr.Radio( ["Balanced and Objective", "Personal Opinion", "Expert Analysis", "Conversational", "Investigative"], label="Perspective", value="Balanced and Objective" ) tone = gr.Radio( ["Professional", "Casual & Friendly", "Humorous", "Serious & Formal", "Inspirational"], label="Tone", value="Professional" ) existing_podcast_info = gr.Textbox( label="Additional Context (Optional)", placeholder="Any additional information, context, or specific points you want to include", lines=3 ) voice_option = gr.Dropdown( choices=available_voices, label="Voice for Audio", value="Adam" ) generate_btn = gr.Button("Generate Podcast", variant="primary") with gr.Column(): podcast_output = gr.Textbox(label="Generated Podcast Script", lines=12) audio_output = gr.Audio(label="Podcast Audio") generate_btn.click( create_podcast, inputs=[podcast_topic, structure, perspective, tone, existing_podcast_info, voice_option], outputs=[podcast_output, audio_output] ) if __name__ == "__main__": demo.launch()