from __future__ import annotations from youtube_transcript_api import YouTubeTranscriptApi from urllib.parse import urlparse, parse_qs def extract_youtube_video_id(url: str) -> str: """Extract the video_id from a YouTube URL.""" parsed = urlparse(url) if parsed.hostname in ('www.youtube.com', 'youtube.com'): return parse_qs(parsed.query)['v'][0] elif parsed.hostname == 'youtu.be': return parsed.path.lstrip('/') raise ValueError("Invalid YouTube URL") def download_transcript(url: str) -> str: """Download the YouTube transcript as a string.""" video_id = extract_youtube_video_id(url) print(f"Downloading transcript for video ID: {video_id}") transcript_list = YouTubeTranscriptApi.get_transcript(video_id) full_text = " ".join([item['text'] for item in transcript_list]) return full_text