yoon2566 commited on
Commit
8e48050
Β·
verified Β·
1 Parent(s): f640b97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -38
app.py CHANGED
@@ -1,65 +1,126 @@
1
  import gradio as gr
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
- from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled
4
- import re
 
 
 
 
 
 
 
 
 
 
5
 
6
  def extract_video_id(url):
7
  """YouTube URLμ—μ„œ λΉ„λ””μ˜€ IDλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜"""
8
  video_id = None
9
- if 'youtube.com/watch?v=' in url:
10
- video_id = url.split('youtube.com/watch?v=')[1][:11]
11
- elif 'youtu.be/' in url:
12
- video_id = url.split('youtu.be/')[1][:11]
13
- return video_id
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def get_transcript(url):
16
  """YouTube μ˜μƒμ˜ 슀크립트λ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜"""
17
  try:
 
18
  video_id = extract_video_id(url)
19
  if not video_id:
20
  return "μ˜¬λ°”λ₯Έ YouTube URL을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
21
 
 
 
 
 
 
 
 
 
 
22
  try:
23
- # λ¨Όμ € ν•œκ΅­μ–΄ μžλ§‰ μ‹œλ„
24
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko'])
25
- except NoTranscriptFound:
26
- try:
27
- # ν•œκ΅­μ–΄ μžλ§‰μ΄ μ—†μœΌλ©΄ μ˜μ–΄ μžλ§‰ μ‹œλ„
28
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
29
- except NoTranscriptFound:
30
  try:
31
- # 일반 μžλ§‰μ΄ μ—†μœΌλ©΄ μžλ™ μƒμ„±λœ μžλ§‰ μ‹œλ„
32
- transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
33
- transcript = transcript_list.find_generated_transcript(['ko', 'en'])
34
- transcript_list = transcript.fetch()
35
  except:
36
- return "이 μ˜μƒμ—λŠ” μžλ§‰μ΄ μ—†κ±°λ‚˜ μžλ§‰μ„ κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€."
37
-
38
- # 전체 슀크립트 ν…μŠ€νŠΈ ꡬ성
39
- full_transcript = ""
40
- for transcript in transcript_list:
41
- text = transcript['text']
42
- timestamp = transcript['start']
43
- minutes = int(timestamp // 60)
44
- seconds = int(timestamp % 60)
45
- full_transcript += f"[{minutes:02d}:{seconds:02d}] {text}\n"
46
-
47
- return full_transcript
48
-
49
- except TranscriptsDisabled:
50
- return "이 μ˜μƒμ€ μžλ§‰μ΄ λΉ„ν™œμ„±ν™”λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
- return f"였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
53
 
54
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
55
  iface = gr.Interface(
56
  fn=get_transcript,
57
- inputs=gr.Textbox(label="YouTube URL을 μž…λ ₯ν•˜μ„Έμš”"),
58
- outputs=gr.Textbox(label="μΆ”μΆœλœ 슀크립트", lines=10),
 
 
59
  title="YouTube 슀크립트 μΆ”μΆœκΈ°",
60
- description="YouTube μ˜μƒμ˜ URL을 μž…λ ₯ν•˜λ©΄ μžλ™μœΌλ‘œ 슀크립트λ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€. (일반 μžλ§‰ λ˜λŠ” μžλ™ μƒμ„±λœ μžλ§‰)"
 
 
 
 
 
 
 
 
 
61
  )
62
 
 
 
 
 
63
  # μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
64
  if __name__ == "__main__":
65
- iface.launch()
 
1
  import gradio as gr
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
+ from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled, NoTranscriptAvailable
4
+ import requests
5
+ import json
6
+ import os
7
+
8
+ def get_video_info(video_id):
9
+ """YouTube μ˜μƒ 정보λ₯Ό κ°€μ Έμ˜€λŠ” ν•¨μˆ˜"""
10
+ try:
11
+ response = requests.get(f'https://www.youtube.com/oembed?url=http://www.youtube.com/watch?v={video_id}&format=json')
12
+ return response.json()
13
+ except:
14
+ return None
15
 
16
  def extract_video_id(url):
17
  """YouTube URLμ—μ„œ λΉ„λ””μ˜€ IDλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜"""
18
  video_id = None
19
+ try:
20
+ if 'youtube.com/watch?v=' in url:
21
+ video_id = url.split('youtube.com/watch?v=')[1].split('&')[0][:11]
22
+ elif 'youtu.be/' in url:
23
+ video_id = url.split('youtu.be/')[1].split('?')[0][:11]
24
+ return video_id
25
+ except:
26
+ return None
27
+
28
+ def get_available_transcripts(video_id):
29
+ """μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λ“  μžλ§‰ λͺ©λ‘μ„ κ°€μ Έμ˜€λŠ” ν•¨μˆ˜"""
30
+ try:
31
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
32
+ return transcript_list
33
+ except:
34
+ return None
35
 
36
  def get_transcript(url):
37
  """YouTube μ˜μƒμ˜ 슀크립트λ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜"""
38
  try:
39
+ # λΉ„λ””μ˜€ ID μΆ”μΆœ
40
  video_id = extract_video_id(url)
41
  if not video_id:
42
  return "μ˜¬λ°”λ₯Έ YouTube URL을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
43
 
44
+ # μ˜μƒ 정보 확인
45
+ video_info = get_video_info(video_id)
46
+ if not video_info:
47
+ return "μ˜μƒ 정보λ₯Ό κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€. URL을 ν™•μΈν•΄μ£Όμ„Έμš”."
48
+
49
+ # μžλ§‰ μΆ”μΆœ μ‹œλ„
50
+ transcript_list = None
51
+ transcript_data = None
52
+
53
  try:
54
+ # λͺ¨λ“  κ°€λŠ₯ν•œ μžλ§‰ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
55
+ transcript_list = get_available_transcripts(video_id)
56
+
57
+ if transcript_list:
58
+ # 1. μˆ˜λ™ ν•œκ΅­μ–΄ μžλ§‰ μ‹œλ„
 
 
59
  try:
60
+ transcript = transcript_list.find_manually_created_transcript(['ko'])
61
+ transcript_data = transcript.fetch()
 
 
62
  except:
63
+ # 2. μžλ™ μƒμ„±λœ ν•œκ΅­μ–΄ μžλ§‰ μ‹œλ„
64
+ try:
65
+ transcript = transcript_list.find_generated_transcript(['ko'])
66
+ transcript_data = transcript.fetch()
67
+ except:
68
+ # 3. μ˜μ–΄ μžλ§‰ μ‹œλ„ (μˆ˜λ™ λ˜λŠ” μžλ™)
69
+ try:
70
+ transcript = transcript_list.find_transcript(['en'])
71
+ transcript_data = transcript.fetch()
72
+ except:
73
+ return "μ§€μ›λ˜λŠ” μžλ§‰μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
74
+
75
+ if not transcript_data:
76
+ # 직접 API둜 μ‹œλ„
77
+ transcript_data = YouTubeTranscriptApi.get_transcript(video_id, languages=['ko', 'en'])
78
+
79
+ # 전체 슀크립트 ν…μŠ€νŠΈ ꡬ성
80
+ full_transcript = f"제λͺ©: {video_info.get('title', '제λͺ© μ—†μŒ')}\n\n"
81
+ for transcript in transcript_data:
82
+ text = transcript['text']
83
+ timestamp = transcript['start']
84
+ minutes = int(timestamp // 60)
85
+ seconds = int(timestamp % 60)
86
+ full_transcript += f"[{minutes:02d}:{seconds:02d}] {text}\n"
87
+
88
+ return full_transcript
89
+
90
+ except NoTranscriptAvailable:
91
+ return "이 μ˜μƒμ—λŠ” μžλ§‰μ΄ μ—†μŠ΅λ‹ˆλ‹€."
92
+ except TranscriptsDisabled:
93
+ return "이 μ˜μƒμ€ μžλ§‰μ΄ λΉ„ν™œμ„±ν™”λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€."
94
+ except Exception as e:
95
+ return f"μžλ§‰ μΆ”μΆœ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
96
+
97
  except Exception as e:
98
+ return f"처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
99
 
100
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
101
  iface = gr.Interface(
102
  fn=get_transcript,
103
+ inputs=[
104
+ gr.Textbox(label="YouTube URL을 μž…λ ₯ν•˜μ„Έμš”", placeholder="https://www.youtube.com/watch?v=...")
105
+ ],
106
+ outputs=gr.Textbox(label="μΆ”μΆœλœ 슀크립트", lines=20),
107
  title="YouTube 슀크립트 μΆ”μΆœκΈ°",
108
+ description="""
109
+ YouTube μ˜μƒμ˜ URL을 μž…λ ₯ν•˜λ©΄ μžλ™μœΌλ‘œ 슀크립트λ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
110
+ - ν•œκ΅­μ–΄ μžλ§‰ (μˆ˜λ™/μžλ™)
111
+ - μ˜μ–΄ μžλ§‰ (μˆ˜λ™/μžλ™)
112
+ 을 순차적으둜 μ‹œλ„ν•©λ‹ˆλ‹€.
113
+ """,
114
+ examples=[
115
+ ["https://www.youtube.com/watch?v=example1"],
116
+ ["https://youtu.be/example2"]
117
+ ]
118
  )
119
 
120
+ # ν™˜κ²½λ³€μˆ˜ μ„€μ •
121
+ os.environ['GRADIO_SERVER_NAME'] = "0.0.0.0"
122
+ os.environ['GRADIO_SERVER_PORT'] = "7860"
123
+
124
  # μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
125
  if __name__ == "__main__":
126
+ iface.launch(server_name="0.0.0.0", server_port=7860)