Spaces:
Running
Running
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
Browse files
app.py
CHANGED
@@ -14,6 +14,8 @@ import os
|
|
14 |
|
15 |
from google.oauth2 import service_account
|
16 |
from googleapiclient.discovery import build
|
|
|
|
|
17 |
|
18 |
from urllib.parse import urlparse, parse_qs
|
19 |
|
@@ -55,6 +57,24 @@ def init_drive_service():
|
|
55 |
service = build('drive', 'v3', credentials=credentials)
|
56 |
return service
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# 检查Google Drive上是否存在文件
|
59 |
def check_file_exists(service, folder_name, file_name):
|
60 |
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
|
@@ -136,17 +156,20 @@ def process_youtube_link(link):
|
|
136 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
137 |
video_id = extract_youtube_id(link)
|
138 |
service = init_drive_service()
|
139 |
-
|
|
|
|
|
|
|
140 |
file_name = f"{video_id}_transcript.txt"
|
141 |
|
142 |
# 检查逐字稿是否存在
|
143 |
-
exists, file_id = check_file_exists(service,
|
144 |
if not exists:
|
145 |
# 获取逐字稿
|
146 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
147 |
transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
|
148 |
# 上传到Google Drive
|
149 |
-
upload_to_drive(service, file_name,
|
150 |
print("逐字稿已上传到Google Drive")
|
151 |
else:
|
152 |
print("逐字稿已存在于Google Drive中")
|
|
|
14 |
|
15 |
from google.oauth2 import service_account
|
16 |
from googleapiclient.discovery import build
|
17 |
+
from googleapiclient.http import MediaFileUpload
|
18 |
+
|
19 |
|
20 |
from urllib.parse import urlparse, parse_qs
|
21 |
|
|
|
57 |
service = build('drive', 'v3', credentials=credentials)
|
58 |
return service
|
59 |
|
60 |
+
def create_folder_if_not_exists(service, folder_name, parent_id):
|
61 |
+
"""检查是否存在特定名称的文件夹,如果不存在则创建"""
|
62 |
+
query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
|
63 |
+
response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
|
64 |
+
folders = response.get('files', [])
|
65 |
+
if not folders:
|
66 |
+
# 文件夹不存在,创建新文件夹
|
67 |
+
file_metadata = {
|
68 |
+
'name': folder_name,
|
69 |
+
'mimeType': 'application/vnd.google-apps.folder',
|
70 |
+
'parents': [parent_id]
|
71 |
+
}
|
72 |
+
folder = service.files().create(body=file_metadata, fields='id').execute()
|
73 |
+
return folder.get('id')
|
74 |
+
else:
|
75 |
+
# 文件夹已存在
|
76 |
+
return folders[0]['id']
|
77 |
+
|
78 |
# 检查Google Drive上是否存在文件
|
79 |
def check_file_exists(service, folder_name, file_name):
|
80 |
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
|
|
|
156 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
157 |
video_id = extract_youtube_id(link)
|
158 |
service = init_drive_service()
|
159 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' # youtube逐字稿圖檔的ID
|
160 |
+
|
161 |
+
# 检查/创建视频ID命名的子文件夹
|
162 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
163 |
file_name = f"{video_id}_transcript.txt"
|
164 |
|
165 |
# 检查逐字稿是否存在
|
166 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
167 |
if not exists:
|
168 |
# 获取逐字稿
|
169 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
170 |
transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
|
171 |
# 上传到Google Drive
|
172 |
+
upload_to_drive(service, file_name, folder_id, transcript_text)
|
173 |
print("逐字稿已上传到Google Drive")
|
174 |
else:
|
175 |
print("逐字稿已存在于Google Drive中")
|