Spaces:

phxdev
/

podcaster

Running

App Files Files Community

marks commited on Jan 17

Commit

0ebcd8e

1 Parent(s): 340bc5b

More changes

Browse files

Files changed (6) hide show

README.md +53 -11
app.py +35 -0
podcast_generator.py +9 -0
requirements.txt +9 -38
scraper.py +24 -0
tts.py +23 -0

README.md CHANGED Viewed

@@ -1,11 +1,53 @@
----
-title: Podcaster
-emoji: 👁
-colorFrom: indigo
-colorTo: gray
-sdk: docker
-pinned: false
-short_description: A Podcast Generator
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Gradio Podcast Generator
+This project is a Gradio application that generates a podcast episode based on content scraped from a provided URL. It utilizes various components to scrape the content, generate a podcast script, and convert the script into audio format.
+## Project Structure
+```
+gradio-podcast-generator
+├── src
+│   ├── app.py               # Entry point of the Gradio application
+│   ├── scraper.py           # Contains the scraping logic
+│   ├── podcast_generator.py  # Generates the podcast episode
+│   └── tts.py               # Converts text to speech
+├── requirements.txt         # Lists project dependencies
+└── README.md                # Project documentation
+```
+## Setup Instructions
+1. Clone the repository:
+   ```
+   git clone https://github.com/yourusername/gradio-podcast-generator.git
+   cd gradio-podcast-generator
+   ```
+2. Install the required dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+## Usage
+To run the Gradio application, execute the following command in your terminal:
+```
+python src/app.py
+```
+Once the application is running, you can input a URL into the Gradio interface. The application will scrape the content from the URL, generate a podcast episode, and provide an audio output.
+## Functionality
+- **Scraping**: The application uses the `scraper.py` module to extract relevant text from the provided URL.
+- **Podcast Generation**: The `podcast_generator.py` module utilizes the `eva-unit-01/eva-llama-3.33-70b` model to create a podcast script that is no longer than 3 minutes.
+- **Text-to-Speech**: The `tts.py` module converts the generated podcast script into audio using the ElevenLabs TTS API.
+## Contributing
+Contributions are welcome! Please feel free to submit a pull request or open an issue for any enhancements or bug fixes.
+## License
+This project is licensed under the MIT License. See the LICENSE file for more details.

app.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from flask import Flask
+import gradio as gr
+from scraper import scrape_url
+from podcast_generator import PodcastGenerator
+from tts import text_to_speech
+app = Flask(__name__)
+def generate_podcast(url):
+    content = scrape_url(url)
+    podcast_generator = PodcastGenerator()
+    podcast_text = podcast_generator.generate_podcast(content)
+    audio_file = text_to_speech(podcast_text)
+    return audio_file
+iface = gr.Interface(
+    fn=generate_podcast,
+    inputs=gr.Textbox(
+        label="Website URL",
+        placeholder="Enter the URL of the website you want to convert to a podcast"
+    ),
+    outputs=gr.Audio(label="Generated Podcast"),
+    title="URL to Podcast Generator",
+    description="Enter a URL to generate a podcast episode based on its content.",
+    theme="huggingface",
+    allow_flagging="never",
+)
+if __name__ == "__main__":
+    iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        debug=True
+    )

podcast_generator.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class PodcastGenerator:
+    def __init__(self, model_client):
+        self.model_client = model_client
+    def generate_podcast(self, scraped_content):
+        prompt = f"Create a podcast episode based on the following content: {scraped_content}"
+        response = self.model_client.generate(prompt, max_length=300)  # Assuming 300 tokens is roughly 3 minutes
+        podcast_text = response.get('text', '')
+        return podcast_text.strip()

requirements.txt CHANGED Viewed

@@ -1,38 +1,9 @@
-# Core AI and Language Models
-langchain-openai>=0.0.1
-langchain-google-genai>=0.0.1
-langchain-anthropic>=0.0.1
-langchain-ollama>=0.0.1
-# Web and Browser Automation
-playwright>=1.40.0
-browser-use>=0.1.20
-aiohttp>=3.8.0
-requests>=2.31.0
-# Audio Generation
-elevenlabs>=0.1.0
-# Data Handling and Validation
-pydantic>=2.0.0
-dataclasses>=0.6
-typing-extensions>=4.8.0
-# Configuration and Environment
-python-dotenv>=1.0.0
-# Error Handling and Logging
-python-json-logger>=2.0.0  # Structured JSON logging
-rich>=13.0.0  # For console output formatting
-backoff>=2.2.0  # Retry mechanism
-tenacity>=8.2.0  # Retry utilities
-# UI and Interface
-gradio>=4.0.0  # For better themes and UI components
-# Async Support
-asyncio>=3.4.3
-# Testing and Development
-pytest>=7.0.0  # For running tests
-mypy>=1.0.0   # For type checking

+gradio==3.0.0
+browser-use
+elevenlabs==0.2.26
+flask==3.0.0
+pydub==0.25.1           # audio processing library
+python-dotenv==1.0.0    # for environment variables
+requests==2.31.0        # for API calls
+numpy>1.24.3           # common dependency
+openrouter

scraper.py ADDED Viewed

	@@ -0,0 +1,24 @@

+def scrape_url(url):
+    from browser_use import Browser
+    from bs4 import BeautifulSoup
+    # Initialize the browser
+    browser = Browser()
+    # Open the URL
+    browser.open(url)
+    # Get the page content
+    content = browser.get_page_source()
+    # Close the browser
+    browser.close()
+    # Parse the HTML content
+    soup = BeautifulSoup(content, 'html.parser')
+    # Extract relevant text (modify the selector as needed)
+    text_elements = soup.find_all(['main'])
+    text_content = ' '.join([element.get_text() for element in text_elements])
+    return text_content.strip()

tts.py ADDED Viewed

	@@ -0,0 +1,23 @@

+def text_to_speech(text, api_key):
+    import requests
+    url = "https://api.elevenlabs.io/v1/text-to-speech"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "text": text,
+        "voice": "en_us_male",  # Specify the desired voice
+        "output_format": "mp3"  # Specify the desired output format
+    }
+    response = requests.post(url, headers=headers, json=data)
+    if response.status_code == 200:
+        audio_content = response.content
+        with open("podcast_episode.mp3", "wb") as audio_file:
+            audio_file.write(audio_content)
+        return "podcast_episode.mp3"
+    else:
+        raise Exception(f"Error: {response.status_code}, {response.text}")