marks commited on
Commit
0a7e33a
Β·
2 Parent(s): 4b172dd c15b647

Merge branch 'main' of https://huggingface.co/spaces/phxdev/podcaster

Browse files
Files changed (7) hide show
  1. Dockerfile +1 -1
  2. README.md +6 -77
  3. api_clients.py +59 -73
  4. app.py +116 -34
  5. config.py +8 -0
  6. models.py +25 -0
  7. requirements.txt +5 -2
Dockerfile CHANGED
@@ -57,6 +57,6 @@ RUN pip install --no-cache-dir -r requirements.txt
57
  # Copy application code
58
  COPY . .
59
 
60
-
61
  # Start the realtime.py script
62
  CMD ["python", "interface.py"]
 
57
  # Copy application code
58
  COPY . .
59
 
60
+ EXPOSE 7863
61
  # Start the realtime.py script
62
  CMD ["python", "interface.py"]
README.md CHANGED
@@ -7,85 +7,14 @@ sdk: gradio
7
  sdk_version: "5.12.0"
8
  app_file: app.py
9
  pinned: false
10
- python_verson: 3.11
 
11
  ---
12
 
13
- # Gradio Podcast Generator
14
 
15
- This project is a Gradio application that generates a podcast episode based on content scraped from a provided URL. It utilizes various components to scrape the content, generate a podcast script, and convert the script into audio format.
16
 
17
- ## Project Structure
18
 
19
- ```
20
- gradio-podcast-generator
21
- β”œβ”€β”€ src
22
- β”‚ β”œβ”€β”€ app.py # Entry point of the Gradio application
23
- β”‚ β”œβ”€β”€ scraper.py # Contains the scraping logic
24
- β”‚ β”œβ”€β”€ podcast_generator.py # Generates the podcast episode
25
- β”‚ └── tts.py # Converts text to speech
26
- β”œβ”€β”€ requirements.txt # Lists project dependencies
27
- └── README.md # Project documentation
28
- ```
29
-
30
- ## Setup Instructions
31
-
32
- 1. Clone the repository:
33
- ```
34
- git clone https://github.com/yourusername/gradio-podcast-generator.git
35
- cd gradio-podcast-generator
36
- ```
37
-
38
- 2. Install the required dependencies:
39
- ```
40
- pip install -r requirements.txt
41
- ```
42
-
43
- ## Flask Configuration
44
-
45
- The application uses Flask with Gradio integration. Here's how to set it up and run it:
46
-
47
- 1. Install the requirements:
48
- ```bash
49
- pip install flask gradio
50
- ```
51
-
52
- 2. Configure the environment:
53
- ```bash
54
- export FLASK_APP=app.py
55
- export FLASK_ENV=development # For development mode
56
- ```
57
-
58
- 3. Run the application:
59
- ```bash
60
- python app.py
61
- ```
62
-
63
- The server will start on `http://0.0.0.0:7860` with the following configuration:
64
- - Host: 0.0.0.0 (accessible from any IP)
65
- - Port: 7860
66
- - Debug mode: Enabled
67
- - Gradio interface: Mounted at root path '/'
68
-
69
- ## Usage
70
-
71
- To run the Gradio application, execute the following command in your terminal:
72
-
73
- ```
74
- python src/app.py
75
- ```
76
-
77
- Once the application is running, you can input a URL into the Gradio interface. The application will scrape the content from the URL, generate a podcast episode, and provide an audio output.
78
-
79
- ## Functionality
80
-
81
- - **Scraping**: The application uses the `scraper.py` module to extract relevant text from the provided URL.
82
- - **Podcast Generation**: The `podcast_generator.py` module utilizes the `eva-unit-01/eva-llama-3.33-70b` model to create a podcast script that is no longer than 3 minutes.
83
- - **Text-to-Speech**: The `tts.py` module converts the generated podcast script into audio using the ElevenLabs TTS API.
84
-
85
- ## Contributing
86
-
87
- Contributions are welcome! Please feel free to submit a pull request or open an issue for any enhancements or bug fixes.
88
-
89
- ## License
90
-
91
- This project is licensed under the MIT License. See the LICENSE file for more details.
 
7
  sdk_version: "5.12.0"
8
  app_file: app.py
9
  pinned: false
10
+ short_description: A Podcast Generator powered by FastAPI and Gradio
11
+ python_version: "3.12"
12
  ---
13
 
14
+ # URL to Podcast Generator
15
 
16
+ A FastAPI application with Gradio interface for generating podcasts from web content.
17
 
18
+ ## Running the Application
19
 
20
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api_clients.py CHANGED
@@ -2,8 +2,10 @@ from functools import lru_cache
2
  from typing import List, Tuple, Optional
3
  import aiohttp
4
  import elevenlabs
 
5
  from contextlib import asynccontextmanager
6
  from logger import setup_logger, log_execution_time, log_async_execution_time
 
7
 
8
  logger = setup_logger("api_clients")
9
 
@@ -12,18 +14,31 @@ class OpenRouterClient:
12
 
13
  def __init__(self, api_key: str):
14
  logger.info("Initializing OpenRouter client")
15
- if not api_key or len(api_key) < 32:
16
- logger.error("Invalid API key format")
17
- raise ValueError("Invalid OpenRouter API key")
18
-
19
  self.api_key = api_key
20
  self.base_url = "https://openrouter.ai/api/v1"
21
  self.headers = {
22
  "Authorization": f"Bearer {api_key}",
23
- "Content-Type": "application/json",
24
  }
25
  logger.debug("OpenRouter client initialized successfully")
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  @asynccontextmanager
28
  async def get_session(self):
29
  logger.debug("Creating new aiohttp session")
@@ -33,7 +48,7 @@ class OpenRouterClient:
33
  @lru_cache(maxsize=1)
34
  async def get_models(self) -> List[Tuple[str, str]]:
35
  """
36
- Fetch available models from OpenRouter API
37
 
38
  Returns:
39
  List of tuples containing (model_id, model_description)
@@ -44,16 +59,11 @@ class OpenRouterClient:
44
  logger.info("Fetching available models from OpenRouter")
45
  async with self.get_session() as session:
46
  async with session.get(f"{self.base_url}/models") as response:
47
- if response.status != 200:
48
- error_msg = await response.text()
49
- logger.error(f"Failed to fetch models: {error_msg}")
50
- raise ValueError(f"Failed to fetch models: {error_msg}")
51
-
52
- models = await response.json()
53
  logger.info(f"Successfully fetched {len(models)} models")
54
- logger.debug(f"Available models: {[model['name'] for model in models]}")
55
- return [(model['id'], f"{model['name']} ({model['context_length']} tokens)")
56
- for model in models]
57
 
58
  @log_async_execution_time(logger)
59
  async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
@@ -75,77 +85,53 @@ class OpenRouterClient:
75
  raise ValueError("Please provide a more detailed prompt")
76
 
77
  try:
 
 
 
 
 
 
 
 
78
  async with self.get_session() as session:
79
- logger.debug("Preparing script generation request")
80
- response = await self._make_script_request(session, content, prompt, model_id)
81
-
82
- script = response['choices'][0]['message']['content']
83
- logger.info(f"Script generated successfully: {len(script)} chars")
84
- logger.debug(f"Script preview: {script[:200]}...")
85
-
86
- return script
87
  except Exception as e:
88
  logger.error(f"Script generation failed", exc_info=True)
89
  raise
90
 
91
- async def _make_script_request(self, session, content, prompt, model_id):
92
- async with session.post(
93
- f"{self.base_url}/chat/completions",
94
- json={
95
- "model": model_id,
96
- "messages": [
97
- {
98
- "role": "system",
99
- "content": "You are an expert podcast script writer. Create engaging, conversational content."
100
- },
101
- {
102
- "role": "user",
103
- "content": f"""Based on this content: {content}
104
- Create a 3-minute podcast script focusing on: {prompt}
105
- Format as a natural conversation with clear speaker parts.
106
- Include [HOST] and [GUEST] markers for different voices."""
107
- }
108
- ]
109
- }
110
- ) as response:
111
- logger.debug("Sending script generation request")
112
-
113
- if response.status != 200:
114
- error_msg = await response.text()
115
- logger.error(f"Script generation failed: {error_msg}")
116
- raise ValueError(f"Script generation failed: {error_msg}")
117
-
118
- return await response.json()
119
-
120
  class ElevenLabsClient:
121
- """Handles ElevenLabs API interactions with detailed performance tracking"""
122
-
123
  def __init__(self, api_key: str):
124
- logger.info("Initializing ElevenLabs client")
125
  self.api_key = api_key
126
  elevenlabs.set_api_key(api_key)
127
-
128
- @lru_cache(maxsize=1)
129
  def get_voices(self) -> List[Tuple[str, str]]:
130
  """
131
- Fetch available voices from ElevenLabs
132
 
133
  Returns:
134
- List of tuples containing (voice_id, voice_name)
135
- """
136
- logger.info("Fetching available voices from ElevenLabs")
137
- voices = elevenlabs.voices()
138
- logger.info(f"Successfully fetched {len(voices)} voices")
139
- logger.debug(f"Available voices: {[voice.name for voice in voices]}")
140
- return [(voice.voice_id, voice.name) for voice in voices]
141
-
142
- @log_execution_time(logger)
143
- def generate_audio(self, text: str, voice_id: str) -> bytes:
144
- """
145
- Generate audio with comprehensive error handling and quality checks
146
-
147
- Logs detailed metrics about the input text and resulting audio.
148
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  logger.info(f"Starting audio generation with voice: {voice_id}")
150
  logger.debug(f"Input text length: {len(text)} chars")
151
 
@@ -154,7 +140,7 @@ class ElevenLabsClient:
154
 
155
  try:
156
  start_time = time.time()
157
- audio = elevenlabs.generate(
158
  text=text,
159
  voice=voice_id,
160
  model="eleven_monolingual_v1"
 
2
  from typing import List, Tuple, Optional
3
  import aiohttp
4
  import elevenlabs
5
+ import time
6
  from contextlib import asynccontextmanager
7
  from logger import setup_logger, log_execution_time, log_async_execution_time
8
+ from models import OpenRouterRequest, OpenRouterResponse, Message, OpenRouterModel
9
 
10
  logger = setup_logger("api_clients")
11
 
 
14
 
15
  def __init__(self, api_key: str):
16
  logger.info("Initializing OpenRouter client")
 
 
 
 
17
  self.api_key = api_key
18
  self.base_url = "https://openrouter.ai/api/v1"
19
  self.headers = {
20
  "Authorization": f"Bearer {api_key}",
21
+ "Content-Type": "application/json"
22
  }
23
  logger.debug("OpenRouter client initialized successfully")
24
 
25
+ @property
26
+ def api_key(self):
27
+ return self._api_key
28
+
29
+ @api_key.setter
30
+ def api_key(self, value: str):
31
+ if not value or len(value) < 32:
32
+ logger.error("Invalid API key format")
33
+ raise ValueError("Invalid OpenRouter API key")
34
+ self._api_key = value
35
+ # Update headers when API key changes
36
+ self.headers = {
37
+ "Authorization": f"Bearer {value}",
38
+ "Content-Type": "application/json",
39
+ }
40
+ logger.info("OpenRouter API key updated successfully")
41
+
42
  @asynccontextmanager
43
  async def get_session(self):
44
  logger.debug("Creating new aiohttp session")
 
48
  @lru_cache(maxsize=1)
49
  async def get_models(self) -> List[Tuple[str, str]]:
50
  """
51
+ Fetch available models from OpenRouter API using pydantic models
52
 
53
  Returns:
54
  List of tuples containing (model_id, model_description)
 
59
  logger.info("Fetching available models from OpenRouter")
60
  async with self.get_session() as session:
61
  async with session.get(f"{self.base_url}/models") as response:
62
+ response.raise_for_status()
63
+ data = await response.json()
64
+ models = [OpenRouterModel(**model) for model in data["data"]]
 
 
 
65
  logger.info(f"Successfully fetched {len(models)} models")
66
+ return [(model.id, model.name) for model in models]
 
 
67
 
68
  @log_async_execution_time(logger)
69
  async def generate_script(self, content: str, prompt: str, model_id: str) -> str:
 
85
  raise ValueError("Please provide a more detailed prompt")
86
 
87
  try:
88
+ request = OpenRouterRequest(
89
+ model=model_id,
90
+ messages=[
91
+ Message(role="system", content="You are a podcast script writer."),
92
+ Message(role="user", content=f"Create a podcast script from this content: {content}")
93
+ ]
94
+ )
95
+
96
  async with self.get_session() as session:
97
+ async with session.post(
98
+ f"{self.base_url}/chat/completions",
99
+ json=request.dict()
100
+ ) as response:
101
+ response.raise_for_status()
102
+ data = await response.json()
103
+ router_response = OpenRouterResponse(**data)
104
+ return router_response.choices[0].message.content
105
  except Exception as e:
106
  logger.error(f"Script generation failed", exc_info=True)
107
  raise
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  class ElevenLabsClient:
 
 
110
  def __init__(self, api_key: str):
 
111
  self.api_key = api_key
112
  elevenlabs.set_api_key(api_key)
113
+
 
114
  def get_voices(self) -> List[Tuple[str, str]]:
115
  """
116
+ Synchronously get available voices from ElevenLabs
117
 
118
  Returns:
119
+ List of tuples containing (voice_id, display_name)
120
+ where display_name shows the name and description but not the ID
 
 
 
 
 
 
 
 
 
 
 
 
121
  """
122
+ try:
123
+ voices = elevenlabs.voices()
124
+ return [(
125
+ voice.voice_id, # Value (hidden from user)
126
+ f"{voice.name} ({voice.labels.get('accent', 'No accent')})" +
127
+ (f" - {voice.description[:50]}..." if voice.description else "")
128
+ ) for voice in voices]
129
+ except Exception as e:
130
+ logger.error("Failed to fetch voices from ElevenLabs", exc_info=True)
131
+ raise
132
+
133
+ async def generate_audio(self, text: str, voice_id: str):
134
+ """Asynchronously generate audio"""
135
  logger.info(f"Starting audio generation with voice: {voice_id}")
136
  logger.debug(f"Input text length: {len(text)} chars")
137
 
 
140
 
141
  try:
142
  start_time = time.time()
143
+ audio = await elevenlabs.generate( # Assuming elevenlabs supports async
144
  text=text,
145
  voice=voice_id,
146
  model="eleven_monolingual_v1"
app.py CHANGED
@@ -1,38 +1,120 @@
1
- from flask import Flask, render_template
 
2
  import gradio as gr
 
 
 
3
  from scraper import scrape_url
4
- from podcast_generator import PodcastGenerator
5
- from tts import text_to_speech
6
-
7
- app = Flask(__name__)
8
-
9
- def generate_podcast(url):
10
- content = scrape_url(url)
11
- podcast_generator = PodcastGenerator()
12
- podcast_text = podcast_generator.generate_podcast(content)
13
- audio_file = text_to_speech(podcast_text)
14
- return audio_file
15
-
16
- # Create Gradio interface
17
- demo = gr.Interface(
18
- fn=generate_podcast,
19
- inputs=gr.Textbox(
20
- label="Website URL",
21
- placeholder="Enter the URL of the website you want to convert to a podcast"
22
- ),
23
- outputs=gr.Audio(label="Generated Podcast"),
24
- title="URL to Podcast Generator",
25
- description="Enter a URL to generate a podcast episode based on its content.",
26
- theme="huggingface",
27
- allow_flagging="never",
28
- )
29
-
30
- # Mount Gradio interface to Flask
31
- app = gr.mount_gradio_app(app, demo, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  if __name__ == "__main__":
34
- app.run(
35
- host="0.0.0.0",
36
- port=7860,
37
- debug=True
38
- )
 
1
+ import asyncio
2
+ import os
3
  import gradio as gr
4
+ from api_clients import OpenRouterClient, ElevenLabsClient
5
+ from logger import setup_logger
6
+ from config import Config
7
  from scraper import scrape_url
8
+
9
+ logger = setup_logger("app")
10
+
11
+ # Default choices for dropdowns
12
+ default_voices = [("", "Enter API key to load voices")]
13
+ default_models = [("", "Enter API key to load models")]
14
+
15
+ class PodcasterUI:
16
+ def __init__(self, config: Config):
17
+ self.config = config
18
+ self.router_client = OpenRouterClient(os.getenv('OPENROUTER_API_KEY', ''))
19
+ self.elevenlabs_client = ElevenLabsClient(os.getenv('ELEVENLABS_API_KEY', ''))
20
+ # Store models and voices as instance variables
21
+ self.models = default_models
22
+ self.voices = default_voices
23
+
24
+ async def initialize(self):
25
+ """Initialize API clients and fetch models/voices"""
26
+ try:
27
+ self.models = await self.router_client.get_models()
28
+ # Since get_voices() might not be async, remove await
29
+ self.voices = self.elevenlabs_client.get_voices()
30
+ logger.info(f"Initialized with {len(self.voices)} voices and {len(self.models)} models")
31
+ except Exception as e:
32
+ logger.error("Failed to initialize API clients", exc_info=True)
33
+ raise
34
+
35
+ async def on_submit(self, content: str, model_id: str, voice_id: str, prompt: str = "") -> tuple:
36
+ """Handle form submission with async API calls"""
37
+ try:
38
+ # First scrape the webpage content
39
+ webpage_content = scrape_url(content)
40
+ if not webpage_content:
41
+ return "Failed to extract content from URL", None
42
+
43
+ # Generate script using the scraped content
44
+ script = await self.router_client.generate_script(webpage_content, prompt, model_id)
45
+
46
+ # Generate audio from the script
47
+ audio = await self.elevenlabs_client.generate_audio(script, voice_id)
48
+ return script, audio
49
+ except Exception as e:
50
+ logger.error("Failed to generate podcast", exc_info=True)
51
+ return str(e), None
52
+
53
+ def create_ui(self) -> gr.Interface:
54
+ with gr.Blocks(title='URL to Podcast Generator', theme='huggingface') as interface:
55
+ gr.Markdown('# URL to Podcast Generator')
56
+ gr.Markdown('Enter a URL to generate a podcast episode based on its content.')
57
+
58
+ with gr.Row():
59
+ with gr.Column(scale=2):
60
+ url_input = gr.Textbox(
61
+ label="Website URL",
62
+ placeholder="Enter the URL of the website you want to convert to a podcast"
63
+ )
64
+
65
+ with gr.Row():
66
+ with gr.Column():
67
+ openrouter_model = gr.Dropdown(
68
+ label='AI Model',
69
+ choices=[(name, id) for id, name in self.models], # Swap order for display
70
+ value=self.models[0][1] if len(self.models) > 1 else None,
71
+ type="index" # Use index to get the second element (id) from tuple
72
+ )
73
+
74
+ with gr.Column():
75
+ voice_model = gr.Dropdown(
76
+ label='Voice',
77
+ choices=[(name, id) for id, name in self.voices], # Swap order for display
78
+ value=self.voices[0][1] if len(self.voices) > 1 else None,
79
+ type="index" # Use index to get the second element (id) from tuple
80
+ )
81
+
82
+ prompt_input = gr.Textbox(
83
+ label="Custom Prompt",
84
+ placeholder="Enter a custom prompt to guide the podcast generation (optional)",
85
+ lines=3
86
+ )
87
+
88
+ submit_btn = gr.Button('Generate Podcast', variant='primary')
89
+
90
+ with gr.Column(scale=1):
91
+ script_output = gr.Textbox(label="Generated Script", interactive=False)
92
+ audio_output = gr.Audio(label="Generated Podcast")
93
+ status = gr.Textbox(label='Status', interactive=False)
94
+
95
+ submit_btn.click(
96
+ fn=self.on_submit,
97
+ inputs=[url_input, openrouter_model, voice_model, prompt_input],
98
+ outputs=[script_output, audio_output]
99
+ )
100
+
101
+ return interface
102
+
103
+ def main():
104
+ config = Config()
105
+ app = PodcasterUI(config)
106
+
107
+ # Initialize before creating UI
108
+ loop = asyncio.get_event_loop()
109
+ loop.run_until_complete(app.initialize())
110
+
111
+ # Create UI with populated data
112
+ interface = app.create_ui()
113
+ interface.launch(
114
+ server_name="0.0.0.0",
115
+ server_port=7860,
116
+ share=True
117
+ )
118
 
119
  if __name__ == "__main__":
120
+ main()
 
 
 
 
config.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ class Config:
5
+ def __init__(self):
6
+ load_dotenv()
7
+ self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY', '')
8
+ self.elevenlabs_api_key = os.getenv('ELEVENLABS_API_KEY', '')
models.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional
3
+
4
+ class Message(BaseModel):
5
+ role: str
6
+ content: str
7
+
8
+ class OpenRouterRequest(BaseModel):
9
+ model: str
10
+ messages: List[Message]
11
+
12
+ class Choice(BaseModel):
13
+ message: Message
14
+ index: int = 0
15
+ finish_reason: Optional[str] = None
16
+
17
+ class OpenRouterResponse(BaseModel):
18
+ id: str
19
+ choices: List[Choice]
20
+ model: str
21
+
22
+ class OpenRouterModel(BaseModel):
23
+ id: str
24
+ name: str
25
+ description: Optional[str] = None
requirements.txt CHANGED
@@ -1,10 +1,13 @@
1
  gradio==3.0.0
2
  browser-use
3
  elevenlabs==0.2.26
4
- flask==3.0.0
5
  pydub==0.25.1 # audio processing library
6
  python-dotenv==1.0.0 # for environment variables
7
  requests==2.31.0 # for API calls
8
  numpy>1.24.3 # common dependency
9
  openrouter
10
- trafilatura>=1.6.1
 
 
 
 
 
1
  gradio==3.0.0
2
  browser-use
3
  elevenlabs==0.2.26
 
4
  pydub==0.25.1 # audio processing library
5
  python-dotenv==1.0.0 # for environment variables
6
  requests==2.31.0 # for API calls
7
  numpy>1.24.3 # common dependency
8
  openrouter
9
+ uvicorn
10
+ fastapi
11
+ langchain_anthropic
12
+ langchain_openai
13
+ langchain_google_genai