Spaces:
Running
Running
get_questions
Browse files
app.py
CHANGED
@@ -518,7 +518,8 @@ def process_youtube_link(link):
|
|
518 |
TRANSCRIPTS = formatted_transcript
|
519 |
|
520 |
# 基于逐字稿生成其他所需的输出
|
521 |
-
|
|
|
522 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
523 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
524 |
summary = summary_json["summary"]
|
@@ -824,26 +825,47 @@ def generate_questions(df_string):
|
|
824 |
|
825 |
return questions
|
826 |
|
827 |
-
def get_questions(video_id, df_string):
|
828 |
-
|
829 |
-
|
830 |
-
|
831 |
-
|
832 |
-
|
833 |
-
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
847 |
|
848 |
q1 = questions[0] if len(questions) > 0 else ""
|
849 |
q2 = questions[1] if len(questions) > 1 else ""
|
|
|
518 |
TRANSCRIPTS = formatted_transcript
|
519 |
|
520 |
# 基于逐字稿生成其他所需的输出
|
521 |
+
source = "gcs"
|
522 |
+
questions = get_questions(video_id, formatted_simple_transcript, source)
|
523 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
524 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
525 |
summary = summary_json["summary"]
|
|
|
825 |
|
826 |
return questions
|
827 |
|
828 |
+
def get_questions(video_id, df_string, source="gcs"):
|
829 |
+
if source == "gcs":
|
830 |
+
# 去 gcs 確認是有有 video_id_questions.json
|
831 |
+
print("===get_questions on gcs===")
|
832 |
+
gcs_client = init_gcs_client(GCS_KEY)
|
833 |
+
bucket_name = 'video_ai_assistant'
|
834 |
+
file_name = f'{video_id}_questions.json'
|
835 |
+
blob_name = f"{video_id}/{file_name}"
|
836 |
+
# 检查檔案是否存在
|
837 |
+
is_questions_exists = gcs_check_file_exists(gcs_client, bucket_name, blob_name)
|
838 |
+
if not is_questions_exists:
|
839 |
+
questions = generate_questions(df_string)
|
840 |
+
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
|
841 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
|
842 |
+
print("questions已上傳到GCS")
|
843 |
+
else:
|
844 |
+
# 逐字稿已存在,下载逐字稿内容
|
845 |
+
print("questions已存在于GCS中")
|
846 |
+
questions_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
847 |
+
questions = json.loads(questions_text)
|
848 |
+
|
849 |
+
else if source == "drive":
|
850 |
+
# 去 g drive 確認是有有 video_id_questions.json
|
851 |
+
print("===get_questions===")
|
852 |
+
service = init_drive_service()
|
853 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
854 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
855 |
+
file_name = f'{video_id}_questions.json'
|
856 |
+
|
857 |
+
# 检查檔案是否存在
|
858 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
859 |
+
if not exists:
|
860 |
+
questions = generate_questions(df_string)
|
861 |
+
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
|
862 |
+
upload_content_directly(service, file_name, folder_id, questions_text)
|
863 |
+
print("questions已上傳到Google Drive")
|
864 |
+
else:
|
865 |
+
# 逐字稿已存在,下载逐字稿内容
|
866 |
+
print("questions已存在于Google Drive中")
|
867 |
+
questions_text = download_file_as_string(service, file_id)
|
868 |
+
questions = json.loads(questions_text)
|
869 |
|
870 |
q1 = questions[0] if len(questions) > 0 else ""
|
871 |
q2 = questions[1] if len(questions) > 1 else ""
|