Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on Apr 20, 2024

Commit

68393b8

1 Parent(s): 910748f

GCS_SERVICE refactor

Browse files

Files changed (1) hide show

app.py +36 -116

app.py CHANGED Viewed

@@ -93,87 +93,6 @@ def verify_password(password):
         raise gr.Error("密碼錯誤")
 # ====gcs====
-def gcs_check_file_exists(gcs_client, bucket_name, file_name):
-    """
-    检查 GCS 存储桶中是否存在指定的文件
-    file_name 格式：{folder_name}/{file_name}
-    """
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(file_name)
-    return blob.exists()
-def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
-    """上传文件到指定的 GCS 存储桶"""
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(destination_blob_name)
-    blob.upload_from_filename(file_path)
-    print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
-def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
-    """上传字符串到指定的 GCS 存储桶"""
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(destination_blob_name)
-    blob.upload_from_string(json_string)
-    print(f"JSON string uploaded to {destination_blob_name} in GCS.")
-def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
-    """从 GCS 下载文件内容到字符串"""
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(source_blob_name)
-    return blob.download_as_text()
-def make_blob_public(gcs_client, bucket_name, blob_name):
-    """将指定的 GCS 对象设置为公共可读"""
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(blob_name)
-    blob.make_public()
-    print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
-def get_blob_public_url(gcs_client, bucket_name, blob_name):
-    """获取指定 GCS 对象的公开 URL"""
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(blob_name)
-    return blob.public_url
-def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path):
-    """上传图片到 GCS 并获取其公开 URL"""
-    # 上传图片
-    upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path)
-    # 将上传的图片设置为公开
-    make_blob_public(gcs_client, bucket_name, file_name)
-    # 获取图片的公开 URL
-    public_url = get_blob_public_url(gcs_client, bucket_name, file_name)
-    print(f"Public URL for the uploaded image: {public_url}")
-    return public_url
-def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
-    # Get all files from the folder
-    query = f"'{drive_folder_id}' in parents and trashed = false"
-    response = drive_service.files().list(q=query).execute()
-    files = response.get('files', [])
-    for file in files:
-        # Copy each file to GCS
-        file_id = file['id']
-        file_name = file['name']
-        gcs_destination_path = f"{gcs_folder_name}/{file_name}"
-        copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
-def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path):
-    # Download file content from Drive
-    request = drive_service.files().get_media(fileId=file_id)
-    fh = io.BytesIO()
-    downloader = MediaIoBaseDownload(fh, request)
-    done = False
-    while not done:
-        status, done = downloader.next_chunk()
-    fh.seek(0)
-    file_content = fh.getvalue()
-    # Upload file content to GCS
-    bucket = gcs_client.bucket(bucket_name)
-    blob = bucket.blob(gcs_destination_path)
-    blob.upload_from_string(file_content)
-    print(f"File {file_id} copied to GCS at {gcs_destination_path}.")
 def delete_blob(gcs_client, bucket_name, blob_name):
     """删除指定的 GCS 对象"""
@@ -483,12 +402,13 @@ def process_transcript_and_screenshots_on_gcs(video_id):
             transcript = generate_transcription_by_whisper(video_id)
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
         is_new_transcript = True
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
-        transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
     # print("===確認其他衍生文件===")
@@ -517,7 +437,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
             # 截图
             screenshot_path = screenshot_youtube_video(video_id, entry['start'])
             screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
-            img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
             entry['img_file_id'] = img_file_id
             print(f"截图已上传到GCS: {img_file_id}")
             is_new_transcript = True
@@ -529,7 +449,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
         print(transcript)
         print("===更新逐字稿文件===")
         updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
         print("逐字稿已更新，包括截图链接")
         updated_transcript_json = json.loads(updated_transcript_text)
     else:
@@ -723,12 +643,12 @@ def get_reading_passage(video_id, df_string, source):
             reading_passage = generate_reading_passage(df_string)
             reading_passage_json = {"reading_passage": str(reading_passage)}
             reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
             print("reading_passage已上传到GCS")
         else:
             # reading_passage已存在，下载内容
             print("reading_passage已存在于GCS中")
-            reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
             reading_passage_json = json.loads(reading_passage_text)
     elif source == "drive":
@@ -805,12 +725,12 @@ def get_mind_map(video_id, df_string, source):
             mind_map = generate_mind_map(df_string)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
             print("mind_map已上傳到GCS")
         else:
             # mindmap已存在，下载内容
             print("mind_map已存在于GCS中")
-            mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
             mind_map_json = json.loads(mind_map_text)
     elif source == "drive":
@@ -889,12 +809,12 @@ def get_video_id_summary(video_id, df_string, source):
             summary = generate_summarise(df_string, meta_data)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
             print("summary已上传到GCS")
         else:
             # summary已存在，下载内容
             print("summary已存在于GCS中")
-            summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
             summary_json = json.loads(summary_text)
     elif source == "drive":
@@ -1012,12 +932,12 @@ def get_questions(video_id, df_string, source="gcs"):
         if not is_questions_exists:
             questions = generate_questions(df_string)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
             print("questions已上傳到GCS")
         else:
             # 逐字稿已存在，下载逐字稿内容
             print("questions已存在于GCS中")
-            questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
             questions = json.loads(questions_text)
     elif source == "drive":
@@ -1103,12 +1023,12 @@ def get_questions_answers(video_id, df_string, source="gcs"):
             if not is_questions_answers_exists:
                 questions_answers = generate_questions_answers(df_string)
                 questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
-                upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
                 print("questions_answers已上傳到GCS")
             else:
                 # questions_answers已存在，下载内容
                 print("questions_answers已存在于GCS中")
-                questions_answers_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
                 questions_answers = json.loads(questions_answers_text)
         except:
             questions = get_questions(video_id, df_string, source)
@@ -1202,12 +1122,12 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
             key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
             print("key_moments已上傳到GCS")
         else:
             # key_moments已存在，下载内容
             print("key_moments已存在于GCS中")
-            key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
             key_moments_json = json.loads(key_moments_text)
              # 檢查 key_moments 是否有 keywords
             print("===檢查 key_moments 是否有 keywords===")
@@ -1222,8 +1142,8 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
                     has_keywords_added = True
             if has_keywords_added:
                 key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
-                upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
-                key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
                 key_moments_json = json.loads(key_moments_text)
     elif source == "drive":
@@ -1545,7 +1465,7 @@ def get_LLM_content(video_id, kind):
     # 检查 file 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
-        content = download_blob_to_string(gcs_client, bucket_name, blob_name)
         content_json = json.loads(content)
         if kind == "reading_passage_latex":
             content_text = content_json["reading_passage"]
@@ -1569,7 +1489,7 @@ def delete_LLM_content(video_id, kind):
     # 检查 file 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
-        delete_blob(gcs_client, bucket_name, blob_name)
         print(f"{file_name}已从GCS中删除")
     return gr.update(value="", interactive=False)
@@ -1585,17 +1505,17 @@ def update_LLM_content(video_id, new_content, kind):
         print(new_content)
         reading_passage_json = {"reading_passage": str(new_content)}
         reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
         updated_content = new_content
     elif kind == "summary_markdown":
         summary_json = {"summary": str(new_content)}
         summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
         updated_content = new_content
     elif kind == "mind_map":
         mind_map_json = {"mind_map": str(new_content)}
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
         updated_content = mind_map_text
     elif kind == "key_moments":
         # from update_LLM_btn  -> new_content is a string
@@ -1606,7 +1526,7 @@ def update_LLM_content(video_id, new_content, kind):
             key_moments_list = new_content
         key_moments_json = {"key_moments": key_moments_list}
         key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
         updated_content = key_moments_text
     elif kind == "transcript":
         if isinstance(new_content, str):
@@ -1614,7 +1534,7 @@ def update_LLM_content(video_id, new_content, kind):
         else:
             transcript_json = new_content
         transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
         updated_content = transcript_text
     elif kind == "questions":
         # from update_LLM_btn  -> new_content is a string
@@ -1624,7 +1544,7 @@ def update_LLM_content(video_id, new_content, kind):
         else:
             questions_json = new_content
         questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
         updated_content = questions_text
     elif kind == "questions_answers":
         # from update_LLM_btn  -> new_content is a string
@@ -1634,7 +1554,7 @@ def update_LLM_content(video_id, new_content, kind):
         else:
             questions_answers_json = new_content
         questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
-        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
         updated_content = questions_answers_text
     print(f"{kind} 已更新到GCS")
@@ -1701,7 +1621,7 @@ def reading_passage_add_latex_version(video_id):
     # 逐字稿已存在，下载逐字稿内容
     print("reading_passage 已存在于GCS中，轉換 Latex 模式")
-    reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
     reading_passage_json = json.loads(reading_passage_text)
     original_reading_passage = reading_passage_json["reading_passage"]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
@@ -1734,7 +1654,7 @@ def reading_passage_add_latex_version(video_id):
     # 另存為 reading_passage_latex.json
     new_file_name = f'{video_id}_reading_passage_latex.json'
     new_blob_name = f"{video_id}/{new_file_name}"
-    upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text)
     return new_reading_passage
@@ -1754,7 +1674,7 @@ def summary_add_markdown_version(video_id):
     # 逐字稿已存在，下载逐字稿内容
     print("summary 已存在于GCS中，轉換 Markdown 模式")
-    summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
     summary_json = json.loads(summary_text)
     original_summary = summary_json["summary"]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
@@ -1803,7 +1723,7 @@ def summary_add_markdown_version(video_id):
     # 另存為 summary_markdown.json
     new_file_name = f'{video_id}_summary_markdown.json'
     new_blob_name = f"{video_id}/{new_file_name}"
-    upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text)
     return new_summary
@@ -1827,7 +1747,7 @@ def get_meta_data(video_id, source="gcs"):
         else:
             # meta_data已存在，下载内容
             print("meta_data已存在于GCS中")
-            meta_data_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
             meta_data_json = json.loads(meta_data_text)
     # meta_data_json grade 數字轉換成文字
@@ -1865,11 +1785,11 @@ def get_ai_content(password, video_id, df_string, topic, grade, level, specific_
             # 先建立一個 ai_content_list.json
             ai_content_list = []
             ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
             print("ai_content_list [] 已上傳到GCS")
         # 此時 ai_content_list 已存在
-        ai_content_list_string = download_blob_to_string(gcs_client, bucket_name, blob_name)
         ai_content_list = json.loads(ai_content_list_string)
         # by key 找到 ai_content （topic, grade, level, specific_feature, content_type）
         target_kvs = {
@@ -1896,7 +1816,7 @@ def get_ai_content(password, video_id, df_string, topic, grade, level, specific_
             ai_content_list.append(ai_content_json)
             ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
-            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
             print("ai_content已上傳到GCS")
         else:
             ai_content_json = ai_content_json[-1]

         raise gr.Error("密碼錯誤")
 # ====gcs====
 def delete_blob(gcs_client, bucket_name, blob_name):
     """删除指定的 GCS 对象"""
             transcript = generate_transcription_by_whisper(video_id)
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
         is_new_transcript = True
     else:
         # 逐字稿已存在，下载逐字稿内容
         print("逐字稿已存在于GCS中")
+        transcript_text = GCS_SERVICE.download_as_string(bucket_name, transcript_blob_name)
         transcript = json.loads(transcript_text)
     # print("===確認其他衍生文件===")
             # 截图
             screenshot_path = screenshot_youtube_video(video_id, entry['start'])
             screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
+            img_file_id = GCS_SERVICE.upload_image_and_get_public_url(bucket_name, screenshot_blob_name, screenshot_path)
             entry['img_file_id'] = img_file_id
             print(f"截图已上传到GCS: {img_file_id}")
             is_new_transcript = True
         print(transcript)
         print("===更新逐字稿文件===")
         updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, updated_transcript_text)
         print("逐字稿已更新，包括截图链接")
         updated_transcript_json = json.loads(updated_transcript_text)
     else:
             reading_passage = generate_reading_passage(df_string)
             reading_passage_json = {"reading_passage": str(reading_passage)}
             reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
             print("reading_passage已上传到GCS")
         else:
             # reading_passage已存在，下载内容
             print("reading_passage已存在于GCS中")
+            reading_passage_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             reading_passage_json = json.loads(reading_passage_text)
     elif source == "drive":
             mind_map = generate_mind_map(df_string)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
             print("mind_map已上傳到GCS")
         else:
             # mindmap已存在，下载内容
             print("mind_map已存在于GCS中")
+            mind_map_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             mind_map_json = json.loads(mind_map_text)
     elif source == "drive":
             summary = generate_summarise(df_string, meta_data)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, summary_file_blob_name, summary_text)
             print("summary已上传到GCS")
         else:
             # summary已存在，下载内容
             print("summary已存在于GCS中")
+            summary_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             summary_json = json.loads(summary_text)
     elif source == "drive":
         if not is_questions_exists:
             questions = generate_questions(df_string)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
             print("questions已上傳到GCS")
         else:
             # 逐字稿已存在，下载逐字稿内容
             print("questions已存在于GCS中")
+            questions_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             questions = json.loads(questions_text)
     elif source == "drive":
             if not is_questions_answers_exists:
                 questions_answers = generate_questions_answers(df_string)
                 questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
+                GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
                 print("questions_answers已上傳到GCS")
             else:
                 # questions_answers已存在，下载内容
                 print("questions_answers已存在于GCS中")
+                questions_answers_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
                 questions_answers = json.loads(questions_answers_text)
         except:
             questions = get_questions(video_id, df_string, source)
             key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
             print("key_moments已上傳到GCS")
         else:
             # key_moments已存在，下载内容
             print("key_moments已存在于GCS中")
+            key_moments_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             key_moments_json = json.loads(key_moments_text)
              # 檢查 key_moments 是否有 keywords
             print("===檢查 key_moments 是否有 keywords===")
                     has_keywords_added = True
             if has_keywords_added:
                 key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
+                GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
+                key_moments_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
                 key_moments_json = json.loads(key_moments_text)
     elif source == "drive":
     # 检查 file 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
+        content = GCS_SERVICE.download_as_string(bucket_name, blob_name)
         content_json = json.loads(content)
         if kind == "reading_passage_latex":
             content_text = content_json["reading_passage"]
     # 检查 file 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
+        GCS_SERVICE.delete_blob(bucket_name, blob_name)
         print(f"{file_name}已从GCS中删除")
     return gr.update(value="", interactive=False)
         print(new_content)
         reading_passage_json = {"reading_passage": str(new_content)}
         reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
         updated_content = new_content
     elif kind == "summary_markdown":
         summary_json = {"summary": str(new_content)}
         summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, summary_text)
         updated_content = new_content
     elif kind == "mind_map":
         mind_map_json = {"mind_map": str(new_content)}
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
         updated_content = mind_map_text
     elif kind == "key_moments":
         # from update_LLM_btn  -> new_content is a string
             key_moments_list = new_content
         key_moments_json = {"key_moments": key_moments_list}
         key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
         updated_content = key_moments_text
     elif kind == "transcript":
         if isinstance(new_content, str):
         else:
             transcript_json = new_content
         transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, transcript_text)
         updated_content = transcript_text
     elif kind == "questions":
         # from update_LLM_btn  -> new_content is a string
         else:
             questions_json = new_content
         questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
         updated_content = questions_text
     elif kind == "questions_answers":
         # from update_LLM_btn  -> new_content is a string
         else:
             questions_answers_json = new_content
         questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
+        GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
         updated_content = questions_answers_text
     print(f"{kind} 已更新到GCS")
     # 逐字稿已存在，下载逐字稿内容
     print("reading_passage 已存在于GCS中，轉換 Latex 模式")
+    reading_passage_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
     reading_passage_json = json.loads(reading_passage_text)
     original_reading_passage = reading_passage_json["reading_passage"]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
     # 另存為 reading_passage_latex.json
     new_file_name = f'{video_id}_reading_passage_latex.json'
     new_blob_name = f"{video_id}/{new_file_name}"
+    GCS_SERVICE.upload_json_string(bucket_name, new_blob_name, reading_passage_text)
     return new_reading_passage
     # 逐字稿已存在，下载逐字稿内容
     print("summary 已存在于GCS中，轉換 Markdown 模式")
+    summary_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
     summary_json = json.loads(summary_text)
     original_summary = summary_json["summary"]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，使用 zh-TW"
     # 另存為 summary_markdown.json
     new_file_name = f'{video_id}_summary_markdown.json'
     new_blob_name = f"{video_id}/{new_file_name}"
+    GCS_SERVICE.upload_json_string(bucket_name, new_blob_name, summary_text)
     return new_summary
         else:
             # meta_data已存在，下载内容
             print("meta_data已存在于GCS中")
+            meta_data_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
             meta_data_json = json.loads(meta_data_text)
     # meta_data_json grade 數字轉換成文字
             # 先建立一個 ai_content_list.json
             ai_content_list = []
             ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, ai_content_text)
             print("ai_content_list [] 已上傳到GCS")
         # 此時 ai_content_list 已存在
+        ai_content_list_string = GCS_SERVICE.download_as_string(bucket_name, blob_name)
         ai_content_list = json.loads(ai_content_list_string)
         # by key 找到 ai_content （topic, grade, level, specific_feature, content_type）
         target_kvs = {
             ai_content_list.append(ai_content_json)
             ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
+            GCS_SERVICE.upload_json_string(bucket_name, blob_name, ai_content_text)
             print("ai_content已上傳到GCS")
         else:
             ai_content_json = ai_content_json[-1]