youngtsai commited on
Commit
68393b8
·
1 Parent(s): 910748f

GCS_SERVICE refactor

Browse files
Files changed (1) hide show
  1. app.py +36 -116
app.py CHANGED
@@ -93,87 +93,6 @@ def verify_password(password):
93
  raise gr.Error("密碼錯誤")
94
 
95
  # ====gcs====
96
- def gcs_check_file_exists(gcs_client, bucket_name, file_name):
97
- """
98
- 检查 GCS 存储桶中是否存在指定的文件
99
- file_name 格式:{folder_name}/{file_name}
100
- """
101
- bucket = gcs_client.bucket(bucket_name)
102
- blob = bucket.blob(file_name)
103
- return blob.exists()
104
-
105
- def upload_file_to_gcs(gcs_client, bucket_name, destination_blob_name, file_path):
106
- """上传文件到指定的 GCS 存储桶"""
107
- bucket = gcs_client.bucket(bucket_name)
108
- blob = bucket.blob(destination_blob_name)
109
- blob.upload_from_filename(file_path)
110
- print(f"File {file_path} uploaded to {destination_blob_name} in GCS.")
111
-
112
- def upload_file_to_gcs_with_json_string(gcs_client, bucket_name, destination_blob_name, json_string):
113
- """上传字符串到指定的 GCS 存储桶"""
114
- bucket = gcs_client.bucket(bucket_name)
115
- blob = bucket.blob(destination_blob_name)
116
- blob.upload_from_string(json_string)
117
- print(f"JSON string uploaded to {destination_blob_name} in GCS.")
118
-
119
- def download_blob_to_string(gcs_client, bucket_name, source_blob_name):
120
- """从 GCS 下载文件内容到字符串"""
121
- bucket = gcs_client.bucket(bucket_name)
122
- blob = bucket.blob(source_blob_name)
123
- return blob.download_as_text()
124
-
125
- def make_blob_public(gcs_client, bucket_name, blob_name):
126
- """将指定的 GCS 对象设置为公共可读"""
127
- bucket = gcs_client.bucket(bucket_name)
128
- blob = bucket.blob(blob_name)
129
- blob.make_public()
130
- print(f"Blob {blob_name} is now publicly accessible at {blob.public_url}")
131
-
132
- def get_blob_public_url(gcs_client, bucket_name, blob_name):
133
- """获取指定 GCS 对象的公开 URL"""
134
- bucket = gcs_client.bucket(bucket_name)
135
- blob = bucket.blob(blob_name)
136
- return blob.public_url
137
-
138
- def upload_img_and_get_public_url(gcs_client, bucket_name, file_name, file_path):
139
- """上传图片到 GCS 并获取其公开 URL"""
140
- # 上传图片
141
- upload_file_to_gcs(gcs_client, bucket_name, file_name, file_path)
142
- # 将上传的图片设置为公开
143
- make_blob_public(gcs_client, bucket_name, file_name)
144
- # 获取图片的公开 URL
145
- public_url = get_blob_public_url(gcs_client, bucket_name, file_name)
146
- print(f"Public URL for the uploaded image: {public_url}")
147
- return public_url
148
-
149
- def copy_all_files_from_drive_to_gcs(drive_service, gcs_client, drive_folder_id, bucket_name, gcs_folder_name):
150
- # Get all files from the folder
151
- query = f"'{drive_folder_id}' in parents and trashed = false"
152
- response = drive_service.files().list(q=query).execute()
153
- files = response.get('files', [])
154
- for file in files:
155
- # Copy each file to GCS
156
- file_id = file['id']
157
- file_name = file['name']
158
- gcs_destination_path = f"{gcs_folder_name}/{file_name}"
159
- copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path)
160
-
161
- def copy_file_from_drive_to_gcs(drive_service, gcs_client, file_id, bucket_name, gcs_destination_path):
162
- # Download file content from Drive
163
- request = drive_service.files().get_media(fileId=file_id)
164
- fh = io.BytesIO()
165
- downloader = MediaIoBaseDownload(fh, request)
166
- done = False
167
- while not done:
168
- status, done = downloader.next_chunk()
169
- fh.seek(0)
170
- file_content = fh.getvalue()
171
-
172
- # Upload file content to GCS
173
- bucket = gcs_client.bucket(bucket_name)
174
- blob = bucket.blob(gcs_destination_path)
175
- blob.upload_from_string(file_content)
176
- print(f"File {file_id} copied to GCS at {gcs_destination_path}.")
177
 
178
  def delete_blob(gcs_client, bucket_name, blob_name):
179
  """删除指定的 GCS 对象"""
@@ -483,12 +402,13 @@ def process_transcript_and_screenshots_on_gcs(video_id):
483
  transcript = generate_transcription_by_whisper(video_id)
484
 
485
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
486
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, transcript_text)
 
487
  is_new_transcript = True
488
  else:
489
  # 逐字稿已存在,下载逐字稿内容
490
  print("逐字稿已存在于GCS中")
491
- transcript_text = download_blob_to_string(gcs_client, bucket_name, transcript_blob_name)
492
  transcript = json.loads(transcript_text)
493
 
494
  # print("===確認其他衍生文件===")
@@ -517,7 +437,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
517
  # 截图
518
  screenshot_path = screenshot_youtube_video(video_id, entry['start'])
519
  screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
520
- img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
521
  entry['img_file_id'] = img_file_id
522
  print(f"截图已上传到GCS: {img_file_id}")
523
  is_new_transcript = True
@@ -529,7 +449,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
529
  print(transcript)
530
  print("===更新逐字稿文件===")
531
  updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
532
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
533
  print("逐字稿已更新,包括截图链接")
534
  updated_transcript_json = json.loads(updated_transcript_text)
535
  else:
@@ -723,12 +643,12 @@ def get_reading_passage(video_id, df_string, source):
723
  reading_passage = generate_reading_passage(df_string)
724
  reading_passage_json = {"reading_passage": str(reading_passage)}
725
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
726
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
727
  print("reading_passage已上传到GCS")
728
  else:
729
  # reading_passage已存在,下载内容
730
  print("reading_passage已存在于GCS中")
731
- reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
732
  reading_passage_json = json.loads(reading_passage_text)
733
 
734
  elif source == "drive":
@@ -805,12 +725,12 @@ def get_mind_map(video_id, df_string, source):
805
  mind_map = generate_mind_map(df_string)
806
  mind_map_json = {"mind_map": str(mind_map)}
807
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
808
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
809
  print("mind_map已上傳到GCS")
810
  else:
811
  # mindmap已存在,下载内容
812
  print("mind_map已存在于GCS中")
813
- mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
814
  mind_map_json = json.loads(mind_map_text)
815
 
816
  elif source == "drive":
@@ -889,12 +809,12 @@ def get_video_id_summary(video_id, df_string, source):
889
  summary = generate_summarise(df_string, meta_data)
890
  summary_json = {"summary": str(summary)}
891
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
892
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
893
  print("summary已上传到GCS")
894
  else:
895
  # summary已存在,下载内容
896
  print("summary已存在于GCS中")
897
- summary_text = download_blob_to_string(gcs_client, bucket_name, summary_file_blob_name)
898
  summary_json = json.loads(summary_text)
899
 
900
  elif source == "drive":
@@ -1012,12 +932,12 @@ def get_questions(video_id, df_string, source="gcs"):
1012
  if not is_questions_exists:
1013
  questions = generate_questions(df_string)
1014
  questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
1015
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
1016
  print("questions已上傳到GCS")
1017
  else:
1018
  # 逐字稿已存在,下载逐字稿内容
1019
  print("questions已存在于GCS中")
1020
- questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1021
  questions = json.loads(questions_text)
1022
 
1023
  elif source == "drive":
@@ -1103,12 +1023,12 @@ def get_questions_answers(video_id, df_string, source="gcs"):
1103
  if not is_questions_answers_exists:
1104
  questions_answers = generate_questions_answers(df_string)
1105
  questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
1106
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
1107
  print("questions_answers已上傳到GCS")
1108
  else:
1109
  # questions_answers已存在,下载内容
1110
  print("questions_answers已存在于GCS中")
1111
- questions_answers_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1112
  questions_answers = json.loads(questions_answers_text)
1113
  except:
1114
  questions = get_questions(video_id, df_string, source)
@@ -1202,12 +1122,12 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
1202
  key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
1203
  key_moments_json = {"key_moments": key_moments}
1204
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1205
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
1206
  print("key_moments已上傳到GCS")
1207
  else:
1208
  # key_moments已存在,下载内容
1209
  print("key_moments已存在于GCS中")
1210
- key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1211
  key_moments_json = json.loads(key_moments_text)
1212
  # 檢查 key_moments 是否有 keywords
1213
  print("===檢查 key_moments 是否有 keywords===")
@@ -1222,8 +1142,8 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
1222
  has_keywords_added = True
1223
  if has_keywords_added:
1224
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1225
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
1226
- key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1227
  key_moments_json = json.loads(key_moments_text)
1228
 
1229
  elif source == "drive":
@@ -1545,7 +1465,7 @@ def get_LLM_content(video_id, kind):
1545
  # 检查 file 是否存在
1546
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1547
  if is_file_exists:
1548
- content = download_blob_to_string(gcs_client, bucket_name, blob_name)
1549
  content_json = json.loads(content)
1550
  if kind == "reading_passage_latex":
1551
  content_text = content_json["reading_passage"]
@@ -1569,7 +1489,7 @@ def delete_LLM_content(video_id, kind):
1569
  # 检查 file 是否存在
1570
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1571
  if is_file_exists:
1572
- delete_blob(gcs_client, bucket_name, blob_name)
1573
  print(f"{file_name}已从GCS中删除")
1574
  return gr.update(value="", interactive=False)
1575
 
@@ -1585,17 +1505,17 @@ def update_LLM_content(video_id, new_content, kind):
1585
  print(new_content)
1586
  reading_passage_json = {"reading_passage": str(new_content)}
1587
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1588
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
1589
  updated_content = new_content
1590
  elif kind == "summary_markdown":
1591
  summary_json = {"summary": str(new_content)}
1592
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1593
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
1594
  updated_content = new_content
1595
  elif kind == "mind_map":
1596
  mind_map_json = {"mind_map": str(new_content)}
1597
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1598
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
1599
  updated_content = mind_map_text
1600
  elif kind == "key_moments":
1601
  # from update_LLM_btn -> new_content is a string
@@ -1606,7 +1526,7 @@ def update_LLM_content(video_id, new_content, kind):
1606
  key_moments_list = new_content
1607
  key_moments_json = {"key_moments": key_moments_list}
1608
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1609
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
1610
  updated_content = key_moments_text
1611
  elif kind == "transcript":
1612
  if isinstance(new_content, str):
@@ -1614,7 +1534,7 @@ def update_LLM_content(video_id, new_content, kind):
1614
  else:
1615
  transcript_json = new_content
1616
  transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
1617
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
1618
  updated_content = transcript_text
1619
  elif kind == "questions":
1620
  # from update_LLM_btn -> new_content is a string
@@ -1624,7 +1544,7 @@ def update_LLM_content(video_id, new_content, kind):
1624
  else:
1625
  questions_json = new_content
1626
  questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
1627
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
1628
  updated_content = questions_text
1629
  elif kind == "questions_answers":
1630
  # from update_LLM_btn -> new_content is a string
@@ -1634,7 +1554,7 @@ def update_LLM_content(video_id, new_content, kind):
1634
  else:
1635
  questions_answers_json = new_content
1636
  questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
1637
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
1638
  updated_content = questions_answers_text
1639
 
1640
  print(f"{kind} 已更新到GCS")
@@ -1701,7 +1621,7 @@ def reading_passage_add_latex_version(video_id):
1701
 
1702
  # 逐字稿已存在,下载逐字稿内容
1703
  print("reading_passage 已存在于GCS中,轉換 Latex 模式")
1704
- reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1705
  reading_passage_json = json.loads(reading_passage_text)
1706
  original_reading_passage = reading_passage_json["reading_passage"]
1707
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
@@ -1734,7 +1654,7 @@ def reading_passage_add_latex_version(video_id):
1734
  # 另存為 reading_passage_latex.json
1735
  new_file_name = f'{video_id}_reading_passage_latex.json'
1736
  new_blob_name = f"{video_id}/{new_file_name}"
1737
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text)
1738
 
1739
  return new_reading_passage
1740
 
@@ -1754,7 +1674,7 @@ def summary_add_markdown_version(video_id):
1754
 
1755
  # 逐字稿已存在,下载逐字稿内容
1756
  print("summary 已存在于GCS中,轉換 Markdown 模式")
1757
- summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1758
  summary_json = json.loads(summary_text)
1759
  original_summary = summary_json["summary"]
1760
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
@@ -1803,7 +1723,7 @@ def summary_add_markdown_version(video_id):
1803
  # 另存為 summary_markdown.json
1804
  new_file_name = f'{video_id}_summary_markdown.json'
1805
  new_blob_name = f"{video_id}/{new_file_name}"
1806
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text)
1807
 
1808
  return new_summary
1809
 
@@ -1827,7 +1747,7 @@ def get_meta_data(video_id, source="gcs"):
1827
  else:
1828
  # meta_data已存在,下载内容
1829
  print("meta_data已存在于GCS中")
1830
- meta_data_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1831
  meta_data_json = json.loads(meta_data_text)
1832
 
1833
  # meta_data_json grade 數字轉換成文字
@@ -1865,11 +1785,11 @@ def get_ai_content(password, video_id, df_string, topic, grade, level, specific_
1865
  # 先建立一個 ai_content_list.json
1866
  ai_content_list = []
1867
  ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
1868
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
1869
  print("ai_content_list [] 已上傳到GCS")
1870
 
1871
  # 此時 ai_content_list 已存在
1872
- ai_content_list_string = download_blob_to_string(gcs_client, bucket_name, blob_name)
1873
  ai_content_list = json.loads(ai_content_list_string)
1874
  # by key 找到 ai_content (topic, grade, level, specific_feature, content_type)
1875
  target_kvs = {
@@ -1896,7 +1816,7 @@ def get_ai_content(password, video_id, df_string, topic, grade, level, specific_
1896
 
1897
  ai_content_list.append(ai_content_json)
1898
  ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
1899
- upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, ai_content_text)
1900
  print("ai_content已上傳到GCS")
1901
  else:
1902
  ai_content_json = ai_content_json[-1]
 
93
  raise gr.Error("密碼錯誤")
94
 
95
  # ====gcs====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  def delete_blob(gcs_client, bucket_name, blob_name):
98
  """删除指定的 GCS 对象"""
 
402
  transcript = generate_transcription_by_whisper(video_id)
403
 
404
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
405
+ GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
406
+
407
  is_new_transcript = True
408
  else:
409
  # 逐字稿已存在,下载逐字稿内容
410
  print("逐字稿已存在于GCS中")
411
+ transcript_text = GCS_SERVICE.download_as_string(bucket_name, transcript_blob_name)
412
  transcript = json.loads(transcript_text)
413
 
414
  # print("===確認其他衍生文件===")
 
437
  # 截图
438
  screenshot_path = screenshot_youtube_video(video_id, entry['start'])
439
  screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
440
+ img_file_id = GCS_SERVICE.upload_image_and_get_public_url(bucket_name, screenshot_blob_name, screenshot_path)
441
  entry['img_file_id'] = img_file_id
442
  print(f"截图已上传到GCS: {img_file_id}")
443
  is_new_transcript = True
 
449
  print(transcript)
450
  print("===更新逐字稿文件===")
451
  updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
452
+ GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, updated_transcript_text)
453
  print("逐字稿已更新,包括截图链接")
454
  updated_transcript_json = json.loads(updated_transcript_text)
455
  else:
 
643
  reading_passage = generate_reading_passage(df_string)
644
  reading_passage_json = {"reading_passage": str(reading_passage)}
645
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
646
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
647
  print("reading_passage已上传到GCS")
648
  else:
649
  # reading_passage已存在,下载内容
650
  print("reading_passage已存在于GCS中")
651
+ reading_passage_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
652
  reading_passage_json = json.loads(reading_passage_text)
653
 
654
  elif source == "drive":
 
725
  mind_map = generate_mind_map(df_string)
726
  mind_map_json = {"mind_map": str(mind_map)}
727
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
728
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
729
  print("mind_map已上傳到GCS")
730
  else:
731
  # mindmap已存在,下载内容
732
  print("mind_map已存在于GCS中")
733
+ mind_map_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
734
  mind_map_json = json.loads(mind_map_text)
735
 
736
  elif source == "drive":
 
809
  summary = generate_summarise(df_string, meta_data)
810
  summary_json = {"summary": str(summary)}
811
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
812
+ GCS_SERVICE.upload_json_string(bucket_name, summary_file_blob_name, summary_text)
813
  print("summary已上传到GCS")
814
  else:
815
  # summary已存在,下载内容
816
  print("summary已存在于GCS中")
817
+ summary_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
818
  summary_json = json.loads(summary_text)
819
 
820
  elif source == "drive":
 
932
  if not is_questions_exists:
933
  questions = generate_questions(df_string)
934
  questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
935
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
936
  print("questions已上傳到GCS")
937
  else:
938
  # 逐字稿已存在,下载逐字稿内容
939
  print("questions已存在于GCS中")
940
+ questions_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
941
  questions = json.loads(questions_text)
942
 
943
  elif source == "drive":
 
1023
  if not is_questions_answers_exists:
1024
  questions_answers = generate_questions_answers(df_string)
1025
  questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
1026
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
1027
  print("questions_answers已上傳到GCS")
1028
  else:
1029
  # questions_answers已存在,下载内容
1030
  print("questions_answers已存在于GCS中")
1031
+ questions_answers_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1032
  questions_answers = json.loads(questions_answers_text)
1033
  except:
1034
  questions = get_questions(video_id, df_string, source)
 
1122
  key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
1123
  key_moments_json = {"key_moments": key_moments}
1124
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1125
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
1126
  print("key_moments已上傳到GCS")
1127
  else:
1128
  # key_moments已存在,下载内容
1129
  print("key_moments已存在于GCS中")
1130
+ key_moments_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1131
  key_moments_json = json.loads(key_moments_text)
1132
  # 檢查 key_moments 是否有 keywords
1133
  print("===檢查 key_moments 是否有 keywords===")
 
1142
  has_keywords_added = True
1143
  if has_keywords_added:
1144
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1145
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
1146
+ key_moments_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1147
  key_moments_json = json.loads(key_moments_text)
1148
 
1149
  elif source == "drive":
 
1465
  # 检查 file 是否存在
1466
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1467
  if is_file_exists:
1468
+ content = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1469
  content_json = json.loads(content)
1470
  if kind == "reading_passage_latex":
1471
  content_text = content_json["reading_passage"]
 
1489
  # 检查 file 是否存在
1490
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1491
  if is_file_exists:
1492
+ GCS_SERVICE.delete_blob(bucket_name, blob_name)
1493
  print(f"{file_name}已从GCS中删除")
1494
  return gr.update(value="", interactive=False)
1495
 
 
1505
  print(new_content)
1506
  reading_passage_json = {"reading_passage": str(new_content)}
1507
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1508
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
1509
  updated_content = new_content
1510
  elif kind == "summary_markdown":
1511
  summary_json = {"summary": str(new_content)}
1512
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1513
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, summary_text)
1514
  updated_content = new_content
1515
  elif kind == "mind_map":
1516
  mind_map_json = {"mind_map": str(new_content)}
1517
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1518
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
1519
  updated_content = mind_map_text
1520
  elif kind == "key_moments":
1521
  # from update_LLM_btn -> new_content is a string
 
1526
  key_moments_list = new_content
1527
  key_moments_json = {"key_moments": key_moments_list}
1528
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
1529
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
1530
  updated_content = key_moments_text
1531
  elif kind == "transcript":
1532
  if isinstance(new_content, str):
 
1534
  else:
1535
  transcript_json = new_content
1536
  transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
1537
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, transcript_text)
1538
  updated_content = transcript_text
1539
  elif kind == "questions":
1540
  # from update_LLM_btn -> new_content is a string
 
1544
  else:
1545
  questions_json = new_content
1546
  questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
1547
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
1548
  updated_content = questions_text
1549
  elif kind == "questions_answers":
1550
  # from update_LLM_btn -> new_content is a string
 
1554
  else:
1555
  questions_answers_json = new_content
1556
  questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
1557
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
1558
  updated_content = questions_answers_text
1559
 
1560
  print(f"{kind} 已更新到GCS")
 
1621
 
1622
  # 逐字稿已存在,下载逐字稿内容
1623
  print("reading_passage 已存在于GCS中,轉換 Latex 模式")
1624
+ reading_passage_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1625
  reading_passage_json = json.loads(reading_passage_text)
1626
  original_reading_passage = reading_passage_json["reading_passage"]
1627
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
 
1654
  # 另存為 reading_passage_latex.json
1655
  new_file_name = f'{video_id}_reading_passage_latex.json'
1656
  new_blob_name = f"{video_id}/{new_file_name}"
1657
+ GCS_SERVICE.upload_json_string(bucket_name, new_blob_name, reading_passage_text)
1658
 
1659
  return new_reading_passage
1660
 
 
1674
 
1675
  # 逐字稿已存在,下载逐字稿内容
1676
  print("summary 已存在于GCS中,轉換 Markdown 模式")
1677
+ summary_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1678
  summary_json = json.loads(summary_text)
1679
  original_summary = summary_json["summary"]
1680
  sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
 
1723
  # 另存為 summary_markdown.json
1724
  new_file_name = f'{video_id}_summary_markdown.json'
1725
  new_blob_name = f"{video_id}/{new_file_name}"
1726
+ GCS_SERVICE.upload_json_string(bucket_name, new_blob_name, summary_text)
1727
 
1728
  return new_summary
1729
 
 
1747
  else:
1748
  # meta_data已存在,下载内容
1749
  print("meta_data已存在于GCS中")
1750
+ meta_data_text = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1751
  meta_data_json = json.loads(meta_data_text)
1752
 
1753
  # meta_data_json grade 數字轉換成文字
 
1785
  # 先建立一個 ai_content_list.json
1786
  ai_content_list = []
1787
  ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
1788
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, ai_content_text)
1789
  print("ai_content_list [] 已上傳到GCS")
1790
 
1791
  # 此時 ai_content_list 已存在
1792
+ ai_content_list_string = GCS_SERVICE.download_as_string(bucket_name, blob_name)
1793
  ai_content_list = json.loads(ai_content_list_string)
1794
  # by key 找到 ai_content (topic, grade, level, specific_feature, content_type)
1795
  target_kvs = {
 
1816
 
1817
  ai_content_list.append(ai_content_json)
1818
  ai_content_text = json.dumps(ai_content_list, ensure_ascii=False, indent=2)
1819
+ GCS_SERVICE.upload_json_string(bucket_name, blob_name, ai_content_text)
1820
  print("ai_content已上傳到GCS")
1821
  else:
1822
  ai_content_json = ai_content_json[-1]