|
import asyncio |
|
import os |
|
import time |
|
from dataclasses import dataclass |
|
from typing import List, Optional, AsyncGenerator |
|
import gradio as gr |
|
from dotenv import load_dotenv |
|
from langchain_openai import ChatOpenAI |
|
from rich.console import Console |
|
from rich.panel import Panel |
|
from rich.text import Text |
|
from logger import setup_logger, log_execution_time, log_async_execution_time |
|
|
|
from browser_use import Agent, Browser |
|
from browser_use.browser.browser import BrowserContext |
|
from api_clients import OpenRouterClient, ElevenLabsClient |
|
|
|
load_dotenv() |
|
|
|
console = Console() |
|
logger = setup_logger("interface") |
|
|
|
@dataclass |
|
class ActionResult: |
|
is_done: bool |
|
extracted_content: Optional[str] |
|
error: Optional[str] |
|
include_in_memory: bool |
|
|
|
|
|
@dataclass |
|
class AgentHistoryList: |
|
all_results: List[ActionResult] |
|
all_model_outputs: List[dict] |
|
|
|
|
|
def parse_agent_history(history_str: str) -> None: |
|
|
|
sections = history_str.split('ActionResult(') |
|
|
|
for i, section in enumerate(sections[1:], 1): |
|
|
|
content = '' |
|
if 'extracted_content=' in section: |
|
content = section.split('extracted_content=')[1].split(',')[0].strip("'") |
|
|
|
if content: |
|
header = Text(f'Step {i}', style='bold blue') |
|
panel = Panel(content, title=header, border_style='blue') |
|
console.print(panel) |
|
console.print() |
|
|
|
|
|
async def run_browser_task( |
|
task: str, |
|
api_key: str, |
|
provider: str = 'openai', |
|
model: str = 'gpt-4-vision', |
|
headless: bool = True, |
|
) -> str: |
|
if not api_key.strip(): |
|
return 'Please provide an API key' |
|
|
|
if provider == 'openai': |
|
os.environ['OPENAI_API_KEY'] = api_key |
|
llm = ChatOpenAI(model=model) |
|
elif provider == 'anthropic': |
|
os.environ['ANTHROPIC_API_KEY'] = api_key |
|
llm = ChatAnthropic(model=model) |
|
else: |
|
os.environ['GOOGLE_API_KEY'] = api_key |
|
llm = ChatGoogleGenerativeAI(model=model) |
|
|
|
try: |
|
agent = Agent( |
|
task=task, |
|
llm=llm, |
|
browser=Browser(BrowserContext(headless=True)) |
|
) |
|
result = await agent.run() |
|
|
|
return result |
|
except Exception as e: |
|
return f'Error: {str(e)}' |
|
|
|
|
|
@log_async_execution_time(logger) |
|
async def scrape_content(url: str) -> str: |
|
""" |
|
Scrape and summarize content from the given URL using browser automation |
|
|
|
This function performs the following steps: |
|
1. Validates the input URL |
|
2. Initializes the browser agent |
|
3. Extracts and summarizes the content |
|
|
|
Args: |
|
url: Target URL to scrape |
|
|
|
Returns: |
|
Summarized content suitable for podcast generation |
|
|
|
Raises: |
|
ValueError: If URL is invalid or content extraction fails |
|
""" |
|
logger.info(f"Starting content scrape for URL: {url}") |
|
|
|
|
|
if not url.startswith(('http://', 'https://')): |
|
logger.error(f"Invalid URL format: {url}") |
|
raise ValueError("URL must start with http:// or https://") |
|
|
|
try: |
|
logger.debug("Initializing LLM and browser agent") |
|
llm = ChatOpenAI(model="gpt-4") |
|
agent = Agent( |
|
task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.", |
|
llm=llm, |
|
browser=Browser(BrowserContext(headless=True)) |
|
) |
|
|
|
logger.info("Executing content extraction") |
|
result = await agent.run() |
|
|
|
logger.debug(f"Content extraction successful. Length: {len(result)} chars") |
|
logger.debug(f"Content preview: {result[:200]}...") |
|
|
|
return result |
|
except Exception as e: |
|
logger.error(f"Content extraction failed for {url}", exc_info=True) |
|
raise |
|
|
|
@log_async_execution_time(logger) |
|
async def create_podcast( |
|
url: str, |
|
prompt: str, |
|
elevenlabs_key: str, |
|
voice_id: str, |
|
openrouter_key: str, |
|
model_id: str, |
|
) -> AsyncGenerator[tuple[Optional[str], str], None]: |
|
""" |
|
Create a podcast through a multi-step process: |
|
1. Content extraction from URL |
|
2. Script generation using AI |
|
3. Voice synthesis |
|
|
|
Progress updates are yielded at each step for UI feedback. |
|
""" |
|
logger.info(f"Starting podcast creation for URL: {url}") |
|
logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}") |
|
logger.debug(f"Prompt length: {len(prompt)} chars") |
|
|
|
try: |
|
|
|
logger.debug("Initializing API clients") |
|
openrouter = OpenRouterClient(openrouter_key) |
|
elevenlabs = ElevenLabsClient(elevenlabs_key) |
|
|
|
|
|
logger.info("Phase 1/3: Content scraping") |
|
yield None, "Scraping website content..." |
|
content = await scrape_content(url) |
|
logger.debug(f"Scraped content length: {len(content)} chars") |
|
|
|
|
|
logger.info("Phase 2/3: Script generation") |
|
yield None, "Generating podcast script..." |
|
script = await openrouter.generate_script(content, prompt, model_id) |
|
logger.debug(f"Generated script length: {len(script)} chars") |
|
|
|
|
|
logger.info("Phase 3/3: Audio generation") |
|
yield None, "Converting to audio..." |
|
audio = elevenlabs.generate_audio(script, voice_id) |
|
logger.debug(f"Generated audio size: {len(audio)} bytes") |
|
|
|
|
|
audio_path = f"podcast_{int(time.time())}.mp3" |
|
logger.debug(f"Saving audio to: {audio_path}") |
|
with open(audio_path, "wb") as f: |
|
f.write(audio) |
|
|
|
logger.info("Podcast creation completed successfully") |
|
yield audio_path, "Podcast created successfully!" |
|
|
|
except Exception as e: |
|
logger.error("Podcast creation failed", exc_info=True) |
|
yield None, f"Error: {str(e)}" |
|
|
|
def create_ui(): |
|
logger.info("Initializing Gradio interface") |
|
|
|
|
|
default_voices = [("", "Enter API key to load voices")] |
|
default_models = [("", "Enter API key to load models")] |
|
|
|
with gr.Blocks(title='PodcastCreator', theme=gr.themes.Soft()) as interface: |
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
url_input = gr.Textbox(label='Source URL', placeholder='Enter the URL...') |
|
prompt = gr.Textbox(label='Podcast Topic', lines=3) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
elevenlabs_key = gr.Textbox( |
|
label='ElevenLabs API Key', |
|
type='password', |
|
placeholder='Enter key...' |
|
) |
|
voice = gr.Dropdown( |
|
label='Voice', |
|
choices=default_voices, |
|
value=None, |
|
allow_custom_value=True |
|
) |
|
|
|
with gr.Column(): |
|
openrouter_key = gr.Textbox( |
|
label='OpenRouter API Key', |
|
type='password', |
|
placeholder='Enter key...' |
|
) |
|
model = gr.Dropdown( |
|
label='AI Model', |
|
choices=default_models, |
|
value=None, |
|
allow_custom_value=True |
|
) |
|
|
|
submit_btn = gr.Button('Create Podcast', variant='primary') |
|
|
|
with gr.Column(scale=1): |
|
audio_output = gr.Audio(label="Generated Podcast") |
|
status = gr.Textbox(label='Status', interactive=False) |
|
|
|
|
|
def update_voices(key): |
|
if not key: |
|
return gr.Dropdown(choices=default_voices, value=default_voices[0][0]) |
|
try: |
|
client = ElevenLabsClient(key) |
|
voices = client.get_voices() |
|
return gr.Dropdown(choices=voices, value=voices[0][0] if voices else None) |
|
except Exception as e: |
|
logger.error(f"Failed to load voices: {e}") |
|
return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None) |
|
|
|
async def update_models(key): |
|
if not key: |
|
return gr.Dropdown(choices=default_models, value=default_models[0][0]) |
|
try: |
|
client = OpenRouterClient(key) |
|
models = await client.get_models() |
|
return gr.Dropdown(choices=models, value=models[0][0] if models else None) |
|
except Exception as e: |
|
logger.error(f"Failed to load models: {e}") |
|
return gr.Dropdown(choices=[(None, f"Error: {str(e)}")], value=None) |
|
|
|
|
|
try: |
|
elevenlabs_key.change(fn=update_voices, inputs=elevenlabs_key, outputs=voice) |
|
openrouter_key.change(fn=update_models, inputs=openrouter_key, outputs=model) |
|
|
|
submit_btn.click( |
|
fn=create_podcast, |
|
inputs=[url_input, prompt, elevenlabs_key, voice, openrouter_key, model], |
|
outputs=[audio_output, status] |
|
) |
|
except Exception as e: |
|
logger.error(f"Failed to set up event handlers: {e}") |
|
raise |
|
|
|
logger.info("Gradio interface initialized successfully") |
|
return interface |
|
|
|
if __name__ == '__main__': |
|
demo = create_ui() |
|
demo.launch() |