Spaces:

phxdev
/

podcaster

Running

podcaster / interface.py

marks

More formtting fixes

340bc5b 2 months ago

9.65 kB

	import asyncio
	import os
	import time
	from dataclasses import dataclass
	from typing import List, Optional, AsyncGenerator
	import gradio as gr
	from dotenv import load_dotenv
	from langchain_openai import ChatOpenAI
	from rich.console import Console
	from rich.panel import Panel
	from rich.text import Text
	from logger import setup_logger, log_execution_time, log_async_execution_time

	from browser_use import Agent, Browser
	from browser_use.browser.browser import BrowserContext
	from api_clients import OpenRouterClient, ElevenLabsClient

	load_dotenv()

	console = Console()
	logger = setup_logger("interface")

	@dataclass
	class ActionResult:
	is_done: bool
	extracted_content: Optional[str]
	error: Optional[str]
	include_in_memory: bool


	@dataclass
	class AgentHistoryList:
	all_results: List[ActionResult]
	all_model_outputs: List[dict]


	def parse_agent_history(history_str: str) -> None:
	# Split the content into sections based on ActionResult entries
	sections = history_str.split('ActionResult(')

	for i, section in enumerate(sections[1:], 1): # Skip first empty section
	# Extract relevant information
	content = ''
	if 'extracted_content=' in section:
	content = section.split('extracted_content=')[1].split(',')[0].strip("'")

	if content:
	header = Text(f'Step {i}', style='bold blue')
	panel = Panel(content, title=header, border_style='blue')
	console.print(panel)
	console.print()


	async def run_browser_task(
	task: str,
	api_key: str,
	provider: str = 'openai',
	model: str = 'gpt-4-vision',
	headless: bool = True,
	) -> str:
	if not api_key.strip():
	return 'Please provide an API key'

	if provider == 'openai':
	os.environ['OPENAI_API_KEY'] = api_key
	llm = ChatOpenAI(model=model)
	elif provider == 'anthropic':
	os.environ['ANTHROPIC_API_KEY'] = api_key
	llm = ChatAnthropic(model=model)
	else: # google
	os.environ['GOOGLE_API_KEY'] = api_key
	llm = ChatGoogleGenerativeAI(model=model)

	try:
	agent = Agent(
	task=task,
	llm=llm,
	browser=Browser(BrowserContext(headless=True))
	)
	result = await agent.run()
	# TODO: The result cloud be parsed better
	return result
	except Exception as e:
	return f'Error: {str(e)}'


	@log_async_execution_time(logger)
	async def scrape_content(url: str) -> str:
	"""
	Scrape and summarize content from the given URL using browser automation

	This function performs the following steps:
	1. Validates the input URL
	2. Initializes the browser agent
	3. Extracts and summarizes the content

	Args:
	url: Target URL to scrape

	Returns:
	Summarized content suitable for podcast generation

	Raises:
	ValueError: If URL is invalid or content extraction fails
	"""
	logger.info(f"Starting content scrape for URL: {url}")

	# Input validation
	if not url.startswith(('http://', 'https://')):
	logger.error(f"Invalid URL format: {url}")
	raise ValueError("URL must start with http:// or https://")

	try:
	logger.debug("Initializing LLM and browser agent")
	llm = ChatOpenAI(model="gpt-4")
	agent = Agent(
	task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
	llm=llm,
	browser=Browser(BrowserContext(headless=True))
	)

	logger.info("Executing content extraction")
	result = await agent.run()

	logger.debug(f"Content extraction successful. Length: {len(result)} chars")
	logger.debug(f"Content preview: {result[:200]}...")

	return result
	except Exception as e:
	logger.error(f"Content extraction failed for {url}", exc_info=True)
	raise

	@log_async_execution_time(logger)
	async def create_podcast(
	url: str,
	prompt: str,
	elevenlabs_key: str,
	voice_id: str,
	openrouter_key: str,
	model_id: str,
	) -> AsyncGenerator[tuple[Optional[str], str], None]:
	"""
	Create a podcast through a multi-step process:
	1. Content extraction from URL
	2. Script generation using AI
	3. Voice synthesis

	Progress updates are yielded at each step for UI feedback.
	"""
	logger.info(f"Starting podcast creation for URL: {url}")
	logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
	logger.debug(f"Prompt length: {len(prompt)} chars")

	try:
	# Initialize clients with validation
	logger.debug("Initializing API clients")
	openrouter = OpenRouterClient(openrouter_key)
	elevenlabs = ElevenLabsClient(elevenlabs_key)

	# Phase 1: Content scraping
	logger.info("Phase 1/3: Content scraping")
	yield None, "Scraping website content..."
	content = await scrape_content(url)
	logger.debug(f"Scraped content length: {len(content)} chars")

	# Phase 2: Script generation
	logger.info("Phase 2/3: Script generation")
	yield None, "Generating podcast script..."
	script = await openrouter.generate_script(content, prompt, model_id)
	logger.debug(f"Generated script length: {len(script)} chars")

	# Phase 3: Audio synthesis
	logger.info("Phase 3/3: Audio generation")
	yield None, "Converting to audio..."
	audio = elevenlabs.generate_audio(script, voice_id)
	logger.debug(f"Generated audio size: {len(audio)} bytes")

	# Save output
	audio_path = f"podcast_{int(time.time())}.mp3"
	logger.debug(f"Saving audio to: {audio_path}")
	with open(audio_path, "wb") as f:
	f.write(audio)

	logger.info("Podcast creation completed successfully")
	yield audio_path, "Podcast created successfully!"

	except Exception as e:
	logger.error("Podcast creation failed", exc_info=True)
	yield None, f"Error: {str(e)}"

	def create_ui():
	logger.info("Initializing Gradio interface")

	# Default choices for dropdowns
	default_voices = [("", "Enter API key to load voices")]
	default_models = [("", "Enter API key to load models")]

	with gr.Blocks(title='PodcastCreator', theme=gr.themes.Soft()) as interface:
	with gr.Row():
	with gr.Column(scale=2):
	url_input = gr.Textbox(label='Source URL', placeholder='Enter the URL...')
	prompt = gr.Textbox(label='Podcast Topic', lines=3)

	with gr.Row():
	with gr.Column():
	elevenlabs_key = gr.Textbox(
	label='ElevenLabs API Key',
	type='password',
	placeholder='Enter key...'
	)
	voice = gr.Dropdown(
	label='Voice',
	choices=default_voices,
	value=None,
	allow_custom_value=True
	)

	with gr.Column():
	openrouter_key = gr.Textbox(
	label='OpenRouter API Key',
	type='password',
	placeholder='Enter key...'
	)
	model = gr.Dropdown(
	label='AI Model',
	choices=default_models,
	value=None,
	allow_custom_value=True
	)

	submit_btn = gr.Button('Create Podcast', variant='primary')

	with gr.Column(scale=1):
	audio_output = gr.Audio(label="Generated Podcast")
	status = gr.Textbox(label='Status', interactive=False)

	# Event handlers
	def update_voices(key):
	if not key:
	return gr.Dropdown(choices=default_voices, value=default_voices[0][0])
	try:
	client = ElevenLabsClient(key)
	voices = client.get_voices()
	return gr.Dropdown(choices=voices, value=voices[0][0] if voices else None)
	except Exception as e:
	logger.error(f"Failed to load voices: {e}")
	return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)

	async def update_models(key):
	if not key:
	return gr.Dropdown(choices=default_models, value=default_models[0][0])
	try:
	client = OpenRouterClient(key)
	models = await client.get_models()
	return gr.Dropdown(choices=models, value=models[0][0] if models else None)
	except Exception as e:
	logger.error(f"Failed to load models: {e}")
	return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None)

	# Add error handling for the event handlers
	try:
	elevenlabs_key.change(fn=update_voices, inputs=elevenlabs_key, outputs=voice)
	openrouter_key.change(fn=update_models, inputs=openrouter_key, outputs=model)

	submit_btn.click(
	fn=create_podcast,
	inputs=[url_input, prompt, elevenlabs_key, voice, openrouter_key, model],
	outputs=[audio_output, status]
	)
	except Exception as e:
	logger.error(f"Failed to set up event handlers: {e}")
	raise

	logger.info("Gradio interface initialized successfully")
	return interface

	if __name__ == '__main__':
	demo = create_ui()
	demo.launch()