Spaces:
Running
Running
def generate_questions_answers(df_string):
Browse files
app.py
CHANGED
@@ -1093,6 +1093,75 @@ def generate_questions(df_string):
|
|
1093 |
|
1094 |
return questions
|
1095 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1096 |
def change_questions(password, df_string):
|
1097 |
verify_password(password)
|
1098 |
|
@@ -1545,6 +1614,16 @@ def update_LLM_content(video_id, new_content, kind):
|
|
1545 |
questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
|
1546 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
|
1547 |
updated_content = questions_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1548 |
|
1549 |
print(f"{kind} 已更新到GCS")
|
1550 |
return gr.update(value=updated_content, interactive=False)
|
@@ -1570,17 +1649,21 @@ def create_LLM_content(video_id, df_string, kind):
|
|
1570 |
transcript = df_string
|
1571 |
formatted_simple_transcript = create_formatted_simple_transcript(transcript)
|
1572 |
formatted_transcript = create_formatted_transcript(video_id, transcript)
|
1573 |
-
|
1574 |
-
update_LLM_content(video_id,
|
1575 |
-
content = json.dumps(
|
1576 |
elif kind == "transcript":
|
1577 |
-
|
1578 |
-
update_LLM_content(video_id,
|
1579 |
-
content = json.dumps(
|
1580 |
elif kind == "questions":
|
1581 |
-
|
1582 |
-
update_LLM_content(video_id,
|
1583 |
-
content = json.dumps(
|
|
|
|
|
|
|
|
|
1584 |
|
1585 |
return gr.update(value=content, interactive=False)
|
1586 |
|
@@ -1707,8 +1790,6 @@ def summary_add_markdown_version(video_id):
|
|
1707 |
|
1708 |
return new_summary
|
1709 |
|
1710 |
-
|
1711 |
-
|
1712 |
|
1713 |
# AI 生成教學素材
|
1714 |
def get_meta_data(video_id, source="gcs"):
|
@@ -2678,6 +2759,16 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2678 |
questions_create_button = gr.Button("重建", size="sm", variant="primary")
|
2679 |
with gr.Row():
|
2680 |
questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2681 |
with gr.Tab("逐字稿"):
|
2682 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
2683 |
with gr.Tab("圖文"):
|
@@ -2999,6 +3090,33 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2999 |
inputs=[video_id, questions_json, questions_kind],
|
3000 |
outputs=[questions_json]
|
3001 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3002 |
|
3003 |
# 教師版
|
3004 |
worksheet_content_btn.click(
|
|
|
1093 |
|
1094 |
return questions
|
1095 |
|
1096 |
+
def get_questions_answers(video_id, df_string, source):
|
1097 |
+
if source == "gcs":
|
1098 |
+
print("===get_questions_answers on gcs===")
|
1099 |
+
gcs_client = GCS_CLIENT
|
1100 |
+
bucket_name = 'video_ai_assistant'
|
1101 |
+
file_name = f'{video_id}_questions_answers.json'
|
1102 |
+
blob_name = f"{video_id}/{file_name}"
|
1103 |
+
# 检查檔案是否存在
|
1104 |
+
is_questions_answers_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
1105 |
+
if not is_questions_answers_exists:
|
1106 |
+
questions_answers = generate_questions_answers(df_string)
|
1107 |
+
questions_answers_json = {"questions_answers": questions_answers}
|
1108 |
+
questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
|
1109 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
|
1110 |
+
print("questions_answers已上傳到GCS")
|
1111 |
+
else:
|
1112 |
+
# questions_answers已存在,下载内容
|
1113 |
+
print("questions_answers已存在于GCS中")
|
1114 |
+
questions_answers_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
1115 |
+
questions_answers_json = json.loads(questions_answers_text)
|
1116 |
+
|
1117 |
+
return questions_answers_json
|
1118 |
+
|
1119 |
+
|
1120 |
+
def generate_questions_answers(df_string):
|
1121 |
+
# 使用 OpenAI 生成基于上传数据的问题
|
1122 |
+
if isinstance(df_string, str):
|
1123 |
+
df_string_json = json.loads(df_string)
|
1124 |
+
else:
|
1125 |
+
df_string_json = df_string
|
1126 |
+
content_text = ""
|
1127 |
+
|
1128 |
+
for entry in df_string_json:
|
1129 |
+
content_text += entry["text"] + ","
|
1130 |
+
|
1131 |
+
# JSON FORMAT: [{"question": "問題", "answer": "答案"}, ...]
|
1132 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW"
|
1133 |
+
user_content = f"""
|
1134 |
+
請根據 {content_text} 生成三個問題,主要與學科有關,不要問跟情節故事相關的問題
|
1135 |
+
並用 JSON 格式返回 questions_answers: [{{question: q1的敘述text, answer: q1的答案text}}, ...]
|
1136 |
+
k-v pair 的 key 是 question, value 是 answer
|
1137 |
+
"""
|
1138 |
+
messages = [
|
1139 |
+
{"role": "system", "content": sys_content},
|
1140 |
+
{"role": "user", "content": user_content}
|
1141 |
+
]
|
1142 |
+
response_format = { "type": "json_object" }
|
1143 |
+
|
1144 |
+
print("=====messages=====")
|
1145 |
+
print(messages)
|
1146 |
+
print("=====messages=====")
|
1147 |
+
|
1148 |
+
|
1149 |
+
request_payload = {
|
1150 |
+
"model": "gpt-4-turbo",
|
1151 |
+
"messages": messages,
|
1152 |
+
"max_tokens": 4000,
|
1153 |
+
"response_format": response_format
|
1154 |
+
}
|
1155 |
+
|
1156 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1157 |
+
questions_answers = json.loads(response.choices[0].message.content)["questions_answers"]
|
1158 |
+
print("=====json_response=====")
|
1159 |
+
print(questions_answers)
|
1160 |
+
print("=====json_response=====")
|
1161 |
+
|
1162 |
+
return questions_answers
|
1163 |
+
|
1164 |
+
|
1165 |
def change_questions(password, df_string):
|
1166 |
verify_password(password)
|
1167 |
|
|
|
1614 |
questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
|
1615 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
|
1616 |
updated_content = questions_text
|
1617 |
+
elif kind == "questions_answers":
|
1618 |
+
# from update_LLM_btn -> new_content is a string
|
1619 |
+
# create_LLM_content -> new_content is a list
|
1620 |
+
if isinstance(new_content, str):
|
1621 |
+
questions_answers_json = json.loads(new_content)
|
1622 |
+
else:
|
1623 |
+
questions_answers_json = new_content
|
1624 |
+
questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
|
1625 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
|
1626 |
+
updated_content = questions_answers_text
|
1627 |
|
1628 |
print(f"{kind} 已更新到GCS")
|
1629 |
return gr.update(value=updated_content, interactive=False)
|
|
|
1649 |
transcript = df_string
|
1650 |
formatted_simple_transcript = create_formatted_simple_transcript(transcript)
|
1651 |
formatted_transcript = create_formatted_transcript(video_id, transcript)
|
1652 |
+
gen_content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
|
1653 |
+
update_LLM_content(video_id, gen_content, kind)
|
1654 |
+
content = json.dumps(gen_content, ensure_ascii=False, indent=2)
|
1655 |
elif kind == "transcript":
|
1656 |
+
gen_content = process_transcript_and_screenshots_on_gcs(video_id)
|
1657 |
+
update_LLM_content(video_id, gen_content, kind)
|
1658 |
+
content = json.dumps(gen_content, ensure_ascii=False, indent=2)
|
1659 |
elif kind == "questions":
|
1660 |
+
gen_content = generate_questions(df_string)
|
1661 |
+
update_LLM_content(video_id, gen_content, kind)
|
1662 |
+
content = json.dumps(gen_content, ensure_ascii=False, indent=2)
|
1663 |
+
elif kind == "questions_answers":
|
1664 |
+
gen_content = generate_questions_answers(df_string)
|
1665 |
+
update_LLM_content(video_id, gen_content, kind)
|
1666 |
+
content = json.dumps(gen_content, ensure_ascii=False, indent=2)
|
1667 |
|
1668 |
return gr.update(value=content, interactive=False)
|
1669 |
|
|
|
1790 |
|
1791 |
return new_summary
|
1792 |
|
|
|
|
|
1793 |
|
1794 |
# AI 生成教學素材
|
1795 |
def get_meta_data(video_id, source="gcs"):
|
|
|
2759 |
questions_create_button = gr.Button("重建", size="sm", variant="primary")
|
2760 |
with gr.Row():
|
2761 |
questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
|
2762 |
+
with gr.Tab("問題答案本文"):
|
2763 |
+
with gr.Row() as questions_answers_admin:
|
2764 |
+
questions_answers_kind = gr.Textbox(value="questions_answers", show_label=False)
|
2765 |
+
questions_answers_get_button = gr.Button("取得", size="sm", variant="primary")
|
2766 |
+
questions_answers_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
2767 |
+
questions_answers_update_button = gr.Button("儲存", size="sm", variant="primary")
|
2768 |
+
questions_answers_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
2769 |
+
questions_answers_create_button = gr.Button("重建", size="sm", variant="primary")
|
2770 |
+
with gr.Row():
|
2771 |
+
questions_answers_json = gr.Textbox(label="Questions Answers", lines=40, interactive=False, show_copy_button=True)
|
2772 |
with gr.Tab("逐字稿"):
|
2773 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
2774 |
with gr.Tab("圖文"):
|
|
|
3090 |
inputs=[video_id, questions_json, questions_kind],
|
3091 |
outputs=[questions_json]
|
3092 |
)
|
3093 |
+
# questions_answers event
|
3094 |
+
questions_answers_get_button.click(
|
3095 |
+
get_LLM_content,
|
3096 |
+
inputs=[video_id, questions_answers_kind],
|
3097 |
+
outputs=[questions_answers_json]
|
3098 |
+
)
|
3099 |
+
questions_answers_create_button.click(
|
3100 |
+
create_LLM_content,
|
3101 |
+
inputs=[video_id, df_string_output, questions_answers_kind],
|
3102 |
+
outputs=[questions_answers_json]
|
3103 |
+
)
|
3104 |
+
questions_answers_delete_button.click(
|
3105 |
+
delete_LLM_content,
|
3106 |
+
inputs=[video_id, questions_answers_kind],
|
3107 |
+
outputs=[questions_answers_json]
|
3108 |
+
)
|
3109 |
+
questions_answers_edit_button.click(
|
3110 |
+
enable_edit_mode,
|
3111 |
+
inputs=[],
|
3112 |
+
outputs=[questions_answers_json]
|
3113 |
+
)
|
3114 |
+
questions_answers_update_button.click(
|
3115 |
+
update_LLM_content,
|
3116 |
+
inputs=[video_id, questions_answers_json, questions_answers_kind],
|
3117 |
+
outputs=[questions_answers_json]
|
3118 |
+
)
|
3119 |
+
|
3120 |
|
3121 |
# 教師版
|
3122 |
worksheet_content_btn.click(
|