Spaces:
Running
Running
from youtube_transcript_api._errors import NoTranscriptFound
Browse files
app.py
CHANGED
@@ -6,7 +6,10 @@ from docx import Document
|
|
6 |
import os
|
7 |
from openai import OpenAI
|
8 |
import json
|
|
|
9 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
|
10 |
|
11 |
from moviepy.editor import VideoFileClip
|
12 |
from pytube import YouTube
|
@@ -218,6 +221,16 @@ def extract_youtube_id(url):
|
|
218 |
else:
|
219 |
return None
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
def process_transcript_and_screenshots(video_id):
|
222 |
print("====process_transcript_and_screenshots====")
|
223 |
service = init_drive_service()
|
@@ -229,7 +242,11 @@ def process_transcript_and_screenshots(video_id):
|
|
229 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
230 |
if not exists:
|
231 |
# 从YouTube获取逐字稿并上传
|
232 |
-
transcript =
|
|
|
|
|
|
|
|
|
233 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
234 |
file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
|
235 |
print("逐字稿已上传到Google Drive")
|
|
|
6 |
import os
|
7 |
from openai import OpenAI
|
8 |
import json
|
9 |
+
|
10 |
from youtube_transcript_api import YouTubeTranscriptApi
|
11 |
+
from youtube_transcript_api._errors import NoTranscriptFound
|
12 |
+
|
13 |
|
14 |
from moviepy.editor import VideoFileClip
|
15 |
from pytube import YouTube
|
|
|
221 |
else:
|
222 |
return None
|
223 |
|
224 |
+
def get_transcript(video_id):
|
225 |
+
languages = ['zh-TW', 'zh-Hant', 'en'] # 優先順序列表
|
226 |
+
for language in languages:
|
227 |
+
try:
|
228 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
|
229 |
+
return transcript # 成功獲取字幕,直接返回結果
|
230 |
+
except NoTranscriptFound:
|
231 |
+
continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
|
232 |
+
return None # 所有嘗試都失敗,返回None
|
233 |
+
|
234 |
def process_transcript_and_screenshots(video_id):
|
235 |
print("====process_transcript_and_screenshots====")
|
236 |
service = init_drive_service()
|
|
|
242 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
243 |
if not exists:
|
244 |
# 从YouTube获取逐字稿并上传
|
245 |
+
transcript = get_transcript(video_id)
|
246 |
+
if transcript:
|
247 |
+
print("成功獲取字幕")
|
248 |
+
else:
|
249 |
+
print("沒有找到字幕")
|
250 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
251 |
file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
|
252 |
print("逐字稿已上传到Google Drive")
|