Spaces:

phxdev
/

podcaster

Running

App Files Files Community

marks commited on Jan 28

Commit

c405952

1 Parent(s): e94fe41

Fixes

Browse files

Files changed (4) hide show

interface.py +127 -15
models.py +0 -4
podcast_generator.py +9 -0
tts.py +23 -0

interface.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 import os
 import time
 from dataclasses import dataclass
-from typing import List, Optional, AsyncGenerator, Tuple
 import gradio as gr
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
@@ -10,6 +10,9 @@ from rich.console import Console
 from rich.panel import Panel
 from rich.text import Text
 from logger import setup_logger, log_execution_time, log_async_execution_time
 from api_clients import OpenRouterClient, ElevenLabsClient
 load_dotenv()
@@ -17,6 +20,116 @@ load_dotenv()
 console = Console()
 logger = setup_logger("interface")
 @log_async_execution_time(logger)
 async def create_podcast(
     url: str,
@@ -25,12 +138,14 @@ async def create_podcast(
     voice_id: str,
     openrouter_key: str,
     model_id: str,
-) -> Tuple[Optional[str], str]:
     """
     Create a podcast through a multi-step process:
     1. Content extraction from URL
     2. Script generation using AI
     3. Voice synthesis
     """
     logger.info(f"Starting podcast creation for URL: {url}")
     logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
@@ -44,24 +159,21 @@ async def create_podcast(
         # Phase 1: Content scraping
         logger.info("Phase 1/3: Content scraping")
-        if not url.startswith(('http://', 'https://')):
-            raise ValueError("URL must start with http:// or https://")
-        logger.debug("Initializing LLM and browser agent")
-        llm = ChatOpenAI(model="gpt-4")
-        task = f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way."
-        content = await llm.apredict(task)
         logger.debug(f"Scraped content length: {len(content)} chars")
         # Phase 2: Script generation
         logger.info("Phase 2/3: Script generation")
         script = await openrouter.generate_script(content, prompt, model_id)
         logger.debug(f"Generated script length: {len(script)} chars")
         # Phase 3: Audio synthesis
         logger.info("Phase 3/3: Audio generation")
-        audio = await elevenlabs.generate_audio(script, voice_id)
-        logger.debug(f"Generated audio data received")
         # Save output
         audio_path = f"podcast_{int(time.time())}.mp3"
@@ -70,11 +182,11 @@ async def create_podcast(
             f.write(audio)
         logger.info("Podcast creation completed successfully")
-        return audio_path, "Podcast created successfully!"
     except Exception as e:
         logger.error("Podcast creation failed", exc_info=True)
-        return None, f"Error: {str(e)}"
 def create_ui():
     logger.info("Initializing Gradio interface")
@@ -119,7 +231,7 @@ def create_ui():
                 submit_btn = gr.Button('Create Podcast', variant='primary')
             with gr.Column(scale=1):
-                audio_output = gr.Audio(label="Generated Podcast", type="filepath")
                 status = gr.Textbox(label='Status', interactive=False)
         # Event handlers
@@ -164,4 +276,4 @@ def create_ui():
 if __name__ == '__main__':
     demo = create_ui()
-    demo.queue().launch()

 import os
 import time
 from dataclasses import dataclass
+from typing import List, Optional, AsyncGenerator
 import gradio as gr
 from dotenv import load_dotenv
 from langchain_openai import ChatOpenAI
 from rich.panel import Panel
 from rich.text import Text
 from logger import setup_logger, log_execution_time, log_async_execution_time
+from browser_use import Agent, Browser
+from browser_use.browser.browser import BrowserContext
 from api_clients import OpenRouterClient, ElevenLabsClient
 load_dotenv()
 console = Console()
 logger = setup_logger("interface")
+@dataclass
+class ActionResult:
+	is_done: bool
+	extracted_content: Optional[str]
+	error: Optional[str]
+	include_in_memory: bool
+@dataclass
+class AgentHistoryList:
+	all_results: List[ActionResult]
+	all_model_outputs: List[dict]
+def parse_agent_history(history_str: str) -> None:
+	# Split the content into sections based on ActionResult entries
+	sections = history_str.split('ActionResult(')
+	for i, section in enumerate(sections[1:], 1):  # Skip first empty section
+		# Extract relevant information
+		content = ''
+		if 'extracted_content=' in section:
+			content = section.split('extracted_content=')[1].split(',')[0].strip("'")
+		if content:
+			header = Text(f'Step {i}', style='bold blue')
+			panel = Panel(content, title=header, border_style='blue')
+			console.print(panel)
+			console.print()
+async def run_browser_task(
+	task: str,
+	api_key: str,
+	provider: str = 'openai',
+	model: str = 'gpt-4-vision',
+	headless: bool = True,
+) -> str:
+	if not api_key.strip():
+		return 'Please provide an API key'
+	if provider == 'openai':
+		os.environ['OPENAI_API_KEY'] = api_key
+		llm = ChatOpenAI(model=model)
+	elif provider == 'anthropic':
+		os.environ['ANTHROPIC_API_KEY'] = api_key
+		llm = ChatAnthropic(model=model)
+	else:  # google
+		os.environ['GOOGLE_API_KEY'] = api_key
+		llm = ChatGoogleGenerativeAI(model=model)
+	try:
+		agent = Agent(
+			task=task,
+			llm=llm,
+			browser=Browser(BrowserContext(headless=True))
+		)
+		result = await agent.run()
+		#  TODO: The result cloud be parsed better
+		return result
+	except Exception as e:
+		return f'Error: {str(e)}'
+@log_async_execution_time(logger)
+async def scrape_content(url: str) -> str:
+    """
+    Scrape and summarize content from the given URL using browser automation
+    This function performs the following steps:
+    1. Validates the input URL
+    2. Initializes the browser agent
+    3. Extracts and summarizes the content
+    Args:
+        url: Target URL to scrape
+    Returns:
+        Summarized content suitable for podcast generation
+    Raises:
+        ValueError: If URL is invalid or content extraction fails
+    """
+    logger.info(f"Starting content scrape for URL: {url}")
+    # Input validation
+    if not url.startswith(('http://', 'https://')):
+        logger.error(f"Invalid URL format: {url}")
+        raise ValueError("URL must start with http:// or https://")
+    try:
+        logger.debug("Initializing LLM and browser agent")
+        llm = ChatOpenAI(model="gpt-4")
+        agent = Agent(
+            task=f"Visit this URL: {url} and extract the main content. Summarize it in a clear and concise way.",
+            llm=llm,
+            browser=Browser(BrowserContext(headless=True))
+        )
+        logger.info("Executing content extraction")
+        result = await agent.run()
+        logger.debug(f"Content extraction successful. Length: {len(result)} chars")
+        logger.debug(f"Content preview: {result[:200]}...")
+        return result
+    except Exception as e:
+        logger.error(f"Content extraction failed for {url}", exc_info=True)
+        raise
 @log_async_execution_time(logger)
 async def create_podcast(
     url: str,
     voice_id: str,
     openrouter_key: str,
     model_id: str,
+) -> AsyncGenerator[tuple[Optional[str], str], None]:
     """
     Create a podcast through a multi-step process:
     1. Content extraction from URL
     2. Script generation using AI
     3. Voice synthesis
+    Progress updates are yielded at each step for UI feedback.
     """
     logger.info(f"Starting podcast creation for URL: {url}")
     logger.debug(f"Parameters - Voice: {voice_id}, Model: {model_id}")
         # Phase 1: Content scraping
         logger.info("Phase 1/3: Content scraping")
+        yield None, "Scraping website content..."
+        content = await scrape_content(url)
         logger.debug(f"Scraped content length: {len(content)} chars")
         # Phase 2: Script generation
         logger.info("Phase 2/3: Script generation")
+        yield None, "Generating podcast script..."
         script = await openrouter.generate_script(content, prompt, model_id)
         logger.debug(f"Generated script length: {len(script)} chars")
         # Phase 3: Audio synthesis
         logger.info("Phase 3/3: Audio generation")
+        yield None, "Converting to audio..."
+        audio = elevenlabs.generate_audio(script, voice_id)
+        logger.debug(f"Generated audio size: {len(audio)} bytes")
         # Save output
         audio_path = f"podcast_{int(time.time())}.mp3"
             f.write(audio)
         logger.info("Podcast creation completed successfully")
+        yield audio_path, "Podcast created successfully!"
     except Exception as e:
         logger.error("Podcast creation failed", exc_info=True)
+        yield None, f"Error: {str(e)}"
 def create_ui():
     logger.info("Initializing Gradio interface")
                 submit_btn = gr.Button('Create Podcast', variant='primary')
             with gr.Column(scale=1):
+                audio_output = gr.Audio(label="Generated Podcast")
                 status = gr.Textbox(label='Status', interactive=False)
         # Event handlers
 if __name__ == '__main__':
     demo = create_ui()
+    demo.launch()

models.py CHANGED Viewed

@@ -10,13 +10,9 @@ class OpenRouterRequest(BaseModel):
     messages: List[Message]
 class OpenRouterChoice(BaseModel):
-    index: int = 0
     message: Message
-    finish_reason: Optional[str] = None
 class OpenRouterResponse(BaseModel):
-    id: str
-    model: str
     choices: List[OpenRouterChoice]
 class OpenRouterModel(BaseModel):

     messages: List[Message]
 class OpenRouterChoice(BaseModel):
     message: Message
 class OpenRouterResponse(BaseModel):
     choices: List[OpenRouterChoice]
 class OpenRouterModel(BaseModel):

podcast_generator.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class PodcastGenerator:
+    def __init__(self, model_client):
+        self.model_client = model_client
+    def generate_podcast(self, scraped_content):
+        prompt = f"Create a podcast episode based on the following content: {scraped_content}"
+        response = self.model_client.generate(prompt, max_length=300)  # Assuming 300 tokens is roughly 3 minutes
+        podcast_text = response.get('text', '')
+        return podcast_text.strip()

tts.py ADDED Viewed

	@@ -0,0 +1,23 @@

+def text_to_speech(text, api_key):
+    import requests
+    url = "https://api.elevenlabs.io/v1/text-to-speech"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "text": text,
+        "voice": "en_us_male",  # Specify the desired voice
+        "output_format": "mp3"  # Specify the desired output format
+    }
+    response = requests.post(url, headers=headers, json=data)
+    if response.status_code == 200:
+        audio_content = response.content
+        with open("podcast_episode.mp3", "wb") as audio_file:
+            audio_file.write(audio_content)
+        return "podcast_episode.mp3"
+    else:
+        raise Exception(f"Error: {response.status_code}, {response.text}")