Spaces:
Sleeping
Sleeping
gcs
Browse files
app.py
CHANGED
@@ -67,20 +67,62 @@ def init_gcs_client(service_account_key_string):
|
|
67 |
gcs_client = storage.Client(credentials=credentials, project=credentials_dict['project_id'])
|
68 |
return gcs_client
|
69 |
|
70 |
-
def
|
71 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
bucket = gcs_client.bucket(bucket_name)
|
73 |
blob = bucket.blob(destination_blob_name)
|
74 |
blob.upload_from_filename(source_file_name)
|
75 |
-
print(f"{source_file_name} uploaded to {destination_blob_name}.")
|
76 |
|
77 |
-
def
|
78 |
-
"""从 GCS
|
79 |
bucket = gcs_client.bucket(bucket_name)
|
80 |
blob = bucket.blob(source_blob_name)
|
81 |
-
blob.
|
82 |
-
print(f"{source_blob_name} downloaded to {destination_file_name}.")
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
# # ====drive====初始化Google Drive服务
|
86 |
def init_drive_service():
|
@@ -278,6 +320,8 @@ def get_transcript(video_id):
|
|
278 |
|
279 |
def process_transcript_and_screenshots(video_id):
|
280 |
print("====process_transcript_and_screenshots====")
|
|
|
|
|
281 |
service = init_drive_service()
|
282 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
283 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
@@ -316,6 +360,16 @@ def process_transcript_and_screenshots(video_id):
|
|
316 |
update_file_on_drive(service, file_id, updated_transcript_text)
|
317 |
print("逐字稿已更新,包括截图链接")
|
318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
return transcript
|
320 |
|
321 |
def process_youtube_link(link):
|
|
|
67 |
gcs_client = storage.Client(credentials=credentials, project=credentials_dict['project_id'])
|
68 |
return gcs_client
|
69 |
|
70 |
+
def gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, folder_name):
|
71 |
+
"""检查是否存在特定名称的文件夹(前缀),如果不存在则创建一个标记文件来模拟文件夹"""
|
72 |
+
bucket = gcs_client.bucket(bucket_name)
|
73 |
+
blob = bucket.blob(folder_name)
|
74 |
+
if not blob.exists():
|
75 |
+
blob.upload_from_string('', content_type='application/x-www-form-urlencoded;charset=UTF-8')
|
76 |
+
print(f"GCS Folder '{folder_name}' created.")
|
77 |
+
else:
|
78 |
+
print(f"GCS Folder '{folder_name}' already exists.")
|
79 |
+
|
80 |
+
def gcs_check_folder_exists(gcs_client, bucket_name, folder_name):
|
81 |
+
"""检查 GCS 存储桶中是否存在指定的文件夹"""
|
82 |
+
bucket = gcs_client.bucket(bucket_name)
|
83 |
+
blobs = list(bucket.list_blobs(prefix=folder_name))
|
84 |
+
return len(blobs) > 0
|
85 |
+
|
86 |
+
def gcs_check_file_exists(gcs_client, bucket_name, file_name):
|
87 |
+
"""
|
88 |
+
检查 GCS 存储桶中是否存在指定的文件
|
89 |
+
file_name 格式:{folder_name}/{file_name}
|
90 |
+
"""
|
91 |
+
bucket = gcs_client.bucket(bucket_name)
|
92 |
+
blob = bucket.blob(file_name)
|
93 |
+
return blob.exists()
|
94 |
+
|
95 |
+
def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, source_file_name):
|
96 |
+
"""上传文件到指定的 GCS 存储桶"""
|
97 |
bucket = gcs_client.bucket(bucket_name)
|
98 |
blob = bucket.blob(destination_blob_name)
|
99 |
blob.upload_from_filename(source_file_name)
|
100 |
+
print(f"File {source_file_name} uploaded to {destination_blob_name}.")
|
101 |
|
102 |
+
def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
|
103 |
+
"""从 GCS 下载文件内容到字符串"""
|
104 |
bucket = gcs_client.bucket(bucket_name)
|
105 |
blob = bucket.blob(source_blob_name)
|
106 |
+
return blob.download_as_text()
|
|
|
107 |
|
108 |
+
def make_blob_public(gcs_client, bucket_name, blob_name):
|
109 |
+
"""将指定的 GCS 对象设置为公共可读"""
|
110 |
+
bucket = gcs_client.bucket(bucket_name)
|
111 |
+
blob = bucket.blob(blob_name)
|
112 |
+
blob.make_public()
|
113 |
+
print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
|
114 |
+
|
115 |
+
def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
|
116 |
+
# Get all files from the folder
|
117 |
+
query = f"'{drive_folder_id}' in parents and trashed = false"
|
118 |
+
response = drive_service.files().list(q=query).execute()
|
119 |
+
files = response.get('files', [])
|
120 |
+
for file in files:
|
121 |
+
# Copy each file to GCS
|
122 |
+
file_id = file['id']
|
123 |
+
file_name = file['name']
|
124 |
+
gcs_destination_path = f"{gcs_folder_name}/{file_name}"
|
125 |
+
copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
|
126 |
|
127 |
# # ====drive====初始化Google Drive服务
|
128 |
def init_drive_service():
|
|
|
320 |
|
321 |
def process_transcript_and_screenshots(video_id):
|
322 |
print("====process_transcript_and_screenshots====")
|
323 |
+
|
324 |
+
# Drive
|
325 |
service = init_drive_service()
|
326 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
327 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
|
|
360 |
update_file_on_drive(service, file_id, updated_transcript_text)
|
361 |
print("逐字稿已更新,包括截图链接")
|
362 |
|
363 |
+
# init gcs client
|
364 |
+
gcs_client = init_gcs_client(GCS_KEY)
|
365 |
+
bucket_name = 'video_ai_assistant'
|
366 |
+
# 检查 folder 是否存在
|
367 |
+
is_gcs_exists = gcs_check_folder_exists(gcs_client, bucket_name, video_id)
|
368 |
+
if not is_gcs_exists:
|
369 |
+
gcs_create_bucket_folder_if_not_exists(gcs_client, bucket_name, video_id)
|
370 |
+
copy_all_files_from_drive_to_gcs(service, gcs_client, folder_id, bucket_name, video_id)
|
371 |
+
print("Drive file 已上传到GCS")
|
372 |
+
|
373 |
return transcript
|
374 |
|
375 |
def process_youtube_link(link):
|