Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on Apr 18, 2024

Commit

b1a24f7

1 Parent(s): ac3e380

def generate_questions_answers(df_string):

Browse files

Files changed (1) hide show

app.py +129 -11

app.py CHANGED Viewed

@@ -1093,6 +1093,75 @@ def generate_questions(df_string):
     return questions
 def change_questions(password, df_string):
     verify_password(password)
@@ -1545,6 +1614,16 @@ def update_LLM_content(video_id, new_content, kind):
         questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
         updated_content = questions_text
     print(f"{kind} 已更新到GCS")
     return gr.update(value=updated_content, interactive=False)
@@ -1570,17 +1649,21 @@ def create_LLM_content(video_id, df_string, kind):
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
         formatted_transcript = create_formatted_transcript(video_id, transcript)
-        content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
-        update_LLM_content(video_id, content, kind)
-        content = json.dumps(content, ensure_ascii=False, indent=2)
     elif kind == "transcript":
-        content = process_transcript_and_screenshots_on_gcs(video_id)
-        update_LLM_content(video_id, content, kind)
-        content = json.dumps(content, ensure_ascii=False, indent=2)
     elif kind == "questions":
-        content = generate_questions(df_string)
-        update_LLM_content(video_id, content, kind)
-        content = json.dumps(content, ensure_ascii=False, indent=2)
     return gr.update(value=content, interactive=False)
@@ -1707,8 +1790,6 @@ def summary_add_markdown_version(video_id):
     return new_summary
 # AI 生成教學素材
 def get_meta_data(video_id, source="gcs"):
@@ -2678,6 +2759,16 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
                 questions_create_button = gr.Button("重建", size="sm", variant="primary")
             with gr.Row():
                 questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
         with gr.Tab("逐字稿"):
             simple_html_content = gr.HTML(label="Simple Transcript")
         with gr.Tab("圖文"):
@@ -2999,6 +3090,33 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         inputs=[video_id, questions_json, questions_kind],
         outputs=[questions_json]
     )
     # 教師版
     worksheet_content_btn.click(

     return questions
+def get_questions_answers(video_id, df_string, source):
+    if source == "gcs":
+        print("===get_questions_answers on gcs===")
+        gcs_client = GCS_CLIENT
+        bucket_name = 'video_ai_assistant'
+        file_name = f'{video_id}_questions_answers.json'
+        blob_name = f"{video_id}/{file_name}"
+        # 检查檔案是否存在
+        is_questions_answers_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
+        if not is_questions_answers_exists:
+            questions_answers = generate_questions_answers(df_string)
+            questions_answers_json = {"questions_answers": questions_answers}
+            questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
+            upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
+            print("questions_answers已上傳到GCS")
+        else:
+            # questions_answers已存在，下载内容
+            print("questions_answers已存在于GCS中")
+            questions_answers_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
+            questions_answers_json = json.loads(questions_answers_text)
+    return questions_answers_json
+def generate_questions_answers(df_string):
+    # 使用 OpenAI 生成基于上传数据的问题
+    if isinstance(df_string, str):
+        df_string_json = json.loads(df_string)
+    else:
+        df_string_json = df_string
+    content_text = ""
+    for entry in df_string_json:
+        content_text += entry["text"] + "，"
+    # JSON FORMAT: [{"question": "問題", "answer": "答案"}, ...]
+    sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，並用既有資料為本質猜測用戶可能會問的問題，使用 zh-TW"
+    user_content = f"""
+        請根據 {content_text} 生成三個問題，主要與學科有關，不要問跟情節故事相關的問題
+        並用 JSON 格式返回 questions_answers: [{{question: q1的敘述text, answer: q1的答案text}}, ...]
+        k-v pair 的 key 是 question, value 是 answer
+    """
+    messages = [
+        {"role": "system", "content": sys_content},
+        {"role": "user", "content": user_content}
+    ]
+    response_format = { "type": "json_object" }
+    print("=====messages=====")
+    print(messages)
+    print("=====messages=====")
+    request_payload = {
+        "model": "gpt-4-turbo",
+        "messages": messages,
+        "max_tokens": 4000,
+        "response_format": response_format
+    }
+    response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
+    questions_answers = json.loads(response.choices[0].message.content)["questions_answers"]
+    print("=====json_response=====")
+    print(questions_answers)
+    print("=====json_response=====")
+    return questions_answers
 def change_questions(password, df_string):
     verify_password(password)
         questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
         updated_content = questions_text
+    elif kind == "questions_answers":
+        # from update_LLM_btn  -> new_content is a string
+        # create_LLM_content -> new_content is a list
+        if isinstance(new_content, str):
+            questions_answers_json = json.loads(new_content)
+        else:
+            questions_answers_json = new_content
+        questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
+        updated_content = questions_answers_text
     print(f"{kind} 已更新到GCS")
     return gr.update(value=updated_content, interactive=False)
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
         formatted_transcript = create_formatted_transcript(video_id, transcript)
+        gen_content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
+        update_LLM_content(video_id, gen_content, kind)
+        content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "transcript":
+        gen_content = process_transcript_and_screenshots_on_gcs(video_id)
+        update_LLM_content(video_id, gen_content, kind)
+        content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions":
+        gen_content = generate_questions(df_string)
+        update_LLM_content(video_id, gen_content, kind)
+        content = json.dumps(gen_content, ensure_ascii=False, indent=2)
+    elif kind == "questions_answers":
+        gen_content = generate_questions_answers(df_string)
+        update_LLM_content(video_id, gen_content, kind)
+        content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     return gr.update(value=content, interactive=False)
     return new_summary
 # AI 生成教學素材
 def get_meta_data(video_id, source="gcs"):
                 questions_create_button = gr.Button("重建", size="sm", variant="primary")
             with gr.Row():
                 questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
+        with gr.Tab("問題答案本文"):
+            with gr.Row() as questions_answers_admin:
+                questions_answers_kind = gr.Textbox(value="questions_answers", show_label=False)
+                questions_answers_get_button = gr.Button("取得", size="sm", variant="primary")
+                questions_answers_edit_button = gr.Button("編輯", size="sm", variant="primary")
+                questions_answers_update_button = gr.Button("儲存", size="sm", variant="primary")
+                questions_answers_delete_button = gr.Button("刪除", size="sm", variant="primary")
+                questions_answers_create_button = gr.Button("重建", size="sm", variant="primary")
+            with gr.Row():
+                questions_answers_json = gr.Textbox(label="Questions Answers", lines=40, interactive=False, show_copy_button=True)
         with gr.Tab("逐字稿"):
             simple_html_content = gr.HTML(label="Simple Transcript")
         with gr.Tab("圖文"):
         inputs=[video_id, questions_json, questions_kind],
         outputs=[questions_json]
     )
+    # questions_answers event
+    questions_answers_get_button.click(
+        get_LLM_content,
+        inputs=[video_id, questions_answers_kind],
+        outputs=[questions_answers_json]
+    )
+    questions_answers_create_button.click(
+        create_LLM_content,
+        inputs=[video_id, df_string_output, questions_answers_kind],
+        outputs=[questions_answers_json]
+    )
+    questions_answers_delete_button.click(
+        delete_LLM_content,
+        inputs=[video_id, questions_answers_kind],
+        outputs=[questions_answers_json]
+    )
+    questions_answers_edit_button.click(
+        enable_edit_mode,
+        inputs=[],
+        outputs=[questions_answers_json]
+    )
+    questions_answers_update_button.click(
+        update_LLM_content,
+        inputs=[video_id, questions_answers_json, questions_answers_kind],
+        outputs=[questions_answers_json]
+    )
     # 教師版
     worksheet_content_btn.click(