File size: 5,463 Bytes
de2cf2d 453a2d3 bc49d10 453a2d3 5a2df22 25549be 5a2df22 38b78b9 453a2d3 5a2df22 e2db192 5a2df22 453a2d3 8e48050 5a2df22 453a2d3 8e48050 453a2d3 8e48050 453a2d3 5a2df22 453a2d3 5a2df22 453a2d3 5a2df22 453a2d3 5a2df22 453a2d3 5a2df22 453a2d3 8e48050 de2cf2d 5a2df22 25549be bc49d10 e2db192 453a2d3 bc49d10 5a2df22 bc49d10 e2db192 de2cf2d bc49d10 e2db192 bc49d10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import yt_dlp
import re
from datetime import timedelta
import os
import browser_cookie3
def get_cookies():
"""Get YouTube cookies from browser"""
try:
# Try Chrome cookies first
cookies = browser_cookie3.chrome(domain_name='.youtube.com')
except:
try:
# Try Firefox cookies if Chrome fails
cookies = browser_cookie3.firefox(domain_name='.youtube.com')
except:
return None
# Convert cookies to Netscape format
cookie_path = '/tmp/youtube.txt'
with open(cookie_path, 'w') as f:
for cookie in cookies:
if cookie.domain == '.youtube.com':
f.write(f"{cookie.domain}\tTRUE\t{cookie.path}\t"
f"{'TRUE' if cookie.secure else 'FALSE'}\t{cookie.expires}\t"
f"{cookie.name}\t{cookie.value}\n")
return cookie_path
def extract_transcript(url):
"""Extract transcript from YouTube video using yt-dlp with cookies"""
try:
# Get cookies
cookie_path = get_cookies()
ydl_opts = {
'writesubtitles': True,
'writeautomaticsub': True,
'subtitleslangs': ['ko', 'en'],
'skip_download': True,
'quiet': True
}
# Add cookies if available
if cookie_path:
ydl_opts['cookiefile'] = cookie_path
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# Get video info
info = ydl.extract_info(url, download=False)
# Get available subtitles
subtitles = info.get('subtitles', {})
automatic_captions = info.get('automatic_captions', {})
# Format output
formatted_output = f"μ λͺ©: {info.get('title', 'μ λͺ© μμ')}\n\n"
# Process subtitles
subtitle_found = False
# Priority order for subtitles
subtitle_priorities = [
('ko', subtitles), # Manual Korean
('ko', automatic_captions), # Auto Korean
('en', subtitles), # Manual English
('en', automatic_captions) # Auto English
]
for lang, sub_dict in subtitle_priorities:
if lang in sub_dict and not subtitle_found:
subs = sub_dict[lang]
if isinstance(subs, list) and subs:
subtitle_found = True
# Process each subtitle entry
for entry in subs:
if 'src' in entry: # JSON format
lines = entry['src'].split('\n')
current_time = None
current_text = []
for line in lines:
# Time stamp line
if re.match(r'\d{2}:\d{2}:\d{2}', line):
if current_time and current_text:
formatted_output += f"[{current_time}] {''.join(current_text)}\n"
time_parts = line.split(':')
current_time = f"{time_parts[1]}:{time_parts[2].split('.')[0]}"
current_text = []
# Text line
elif line.strip() and not line.startswith('WEBVTT'):
current_text.append(line.strip() + ' ')
# Add last subtitle
if current_time and current_text:
formatted_output += f"[{current_time}] {''.join(current_text)}\n"
break
if not subtitle_found:
return "μλ§μ μ°Ύμ μ μμ΅λλ€. (μλ μμ± μλ§ ν¬ν¨)"
# Clean up cookie file
if cookie_path and os.path.exists(cookie_path):
os.remove(cookie_path)
return formatted_output
except Exception as e:
error_msg = str(e)
if "Sign in to confirm your age" in error_msg:
return "μ°λ Ή μ νμ΄ μλ μμμ
λλ€."
elif "confirm you're not a bot" in error_msg:
return "YouTubeκ° λ΄ λ°©μ§λ₯Ό μν΄ μΈμ¦μ μꡬν©λλ€. μ μ ν λ€μ μλν΄μ£ΌμΈμ."
return f"μλ§ μΆμΆ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_msg}"
# Create Gradio interface
iface = gr.Interface(
fn=extract_transcript,
inputs=gr.Textbox(
label="YouTube URL",
placeholder="https://www.youtube.com/watch?v=..."
),
outputs=gr.Textbox(
label="μΆμΆλ μ€ν¬λ¦½νΈ",
lines=20
),
title="YouTube μλ§ μΆμΆκΈ°",
description="""
YouTube μμμ URLμ μ
λ ₯νλ©΄ μλ§μ μΆμΆν©λλ€.
- νκ΅μ΄ μλ§ μ°μ (μλ > μλ)
- μμ΄ μλ§ μ°¨μ (μλ > μλ)
""",
allow_flagging="never"
)
# Launch the app
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0") |