youngtsai commited on
Commit
b1a24f7
·
1 Parent(s): ac3e380

def generate_questions_answers(df_string):

Browse files
Files changed (1) hide show
  1. app.py +129 -11
app.py CHANGED
@@ -1093,6 +1093,75 @@ def generate_questions(df_string):
1093
 
1094
  return questions
1095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1096
  def change_questions(password, df_string):
1097
  verify_password(password)
1098
 
@@ -1545,6 +1614,16 @@ def update_LLM_content(video_id, new_content, kind):
1545
  questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
1546
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
1547
  updated_content = questions_text
 
 
 
 
 
 
 
 
 
 
1548
 
1549
  print(f"{kind} 已更新到GCS")
1550
  return gr.update(value=updated_content, interactive=False)
@@ -1570,17 +1649,21 @@ def create_LLM_content(video_id, df_string, kind):
1570
  transcript = df_string
1571
  formatted_simple_transcript = create_formatted_simple_transcript(transcript)
1572
  formatted_transcript = create_formatted_transcript(video_id, transcript)
1573
- content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
1574
- update_LLM_content(video_id, content, kind)
1575
- content = json.dumps(content, ensure_ascii=False, indent=2)
1576
  elif kind == "transcript":
1577
- content = process_transcript_and_screenshots_on_gcs(video_id)
1578
- update_LLM_content(video_id, content, kind)
1579
- content = json.dumps(content, ensure_ascii=False, indent=2)
1580
  elif kind == "questions":
1581
- content = generate_questions(df_string)
1582
- update_LLM_content(video_id, content, kind)
1583
- content = json.dumps(content, ensure_ascii=False, indent=2)
 
 
 
 
1584
 
1585
  return gr.update(value=content, interactive=False)
1586
 
@@ -1707,8 +1790,6 @@ def summary_add_markdown_version(video_id):
1707
 
1708
  return new_summary
1709
 
1710
-
1711
-
1712
 
1713
  # AI 生成教學素材
1714
  def get_meta_data(video_id, source="gcs"):
@@ -2678,6 +2759,16 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2678
  questions_create_button = gr.Button("重建", size="sm", variant="primary")
2679
  with gr.Row():
2680
  questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
 
 
 
 
 
 
 
 
 
 
2681
  with gr.Tab("逐字稿"):
2682
  simple_html_content = gr.HTML(label="Simple Transcript")
2683
  with gr.Tab("圖文"):
@@ -2999,6 +3090,33 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2999
  inputs=[video_id, questions_json, questions_kind],
3000
  outputs=[questions_json]
3001
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3002
 
3003
  # 教師版
3004
  worksheet_content_btn.click(
 
1093
 
1094
  return questions
1095
 
1096
+ def get_questions_answers(video_id, df_string, source):
1097
+ if source == "gcs":
1098
+ print("===get_questions_answers on gcs===")
1099
+ gcs_client = GCS_CLIENT
1100
+ bucket_name = 'video_ai_assistant'
1101
+ file_name = f'{video_id}_questions_answers.json'
1102
+ blob_name = f"{video_id}/{file_name}"
1103
+ # 检查檔案是否存在
1104
+ is_questions_answers_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1105
+ if not is_questions_answers_exists:
1106
+ questions_answers = generate_questions_answers(df_string)
1107
+ questions_answers_json = {"questions_answers": questions_answers}
1108
+ questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
1109
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
1110
+ print("questions_answers已上傳到GCS")
1111
+ else:
1112
+ # questions_answers已存在,下载内容
1113
+ print("questions_answers已存在于GCS中")
1114
+ questions_answers_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
1115
+ questions_answers_json = json.loads(questions_answers_text)
1116
+
1117
+ return questions_answers_json
1118
+
1119
+
1120
+ def generate_questions_answers(df_string):
1121
+ # 使用 OpenAI 生成基于上传数据的问题
1122
+ if isinstance(df_string, str):
1123
+ df_string_json = json.loads(df_string)
1124
+ else:
1125
+ df_string_json = df_string
1126
+ content_text = ""
1127
+
1128
+ for entry in df_string_json:
1129
+ content_text += entry["text"] + ","
1130
+
1131
+ # JSON FORMAT: [{"question": "問題", "answer": "答案"}, ...]
1132
+ sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW"
1133
+ user_content = f"""
1134
+ 請根據 {content_text} 生成三個問題,主要與學科有關,不要問跟情節故事相關的問題
1135
+ 並用 JSON 格式返回 questions_answers: [{{question: q1的敘述text, answer: q1的答案text}}, ...]
1136
+ k-v pair 的 key 是 question, value 是 answer
1137
+ """
1138
+ messages = [
1139
+ {"role": "system", "content": sys_content},
1140
+ {"role": "user", "content": user_content}
1141
+ ]
1142
+ response_format = { "type": "json_object" }
1143
+
1144
+ print("=====messages=====")
1145
+ print(messages)
1146
+ print("=====messages=====")
1147
+
1148
+
1149
+ request_payload = {
1150
+ "model": "gpt-4-turbo",
1151
+ "messages": messages,
1152
+ "max_tokens": 4000,
1153
+ "response_format": response_format
1154
+ }
1155
+
1156
+ response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
1157
+ questions_answers = json.loads(response.choices[0].message.content)["questions_answers"]
1158
+ print("=====json_response=====")
1159
+ print(questions_answers)
1160
+ print("=====json_response=====")
1161
+
1162
+ return questions_answers
1163
+
1164
+
1165
  def change_questions(password, df_string):
1166
  verify_password(password)
1167
 
 
1614
  questions_text = json.dumps(questions_json, ensure_ascii=False, indent=2)
1615
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_text)
1616
  updated_content = questions_text
1617
+ elif kind == "questions_answers":
1618
+ # from update_LLM_btn -> new_content is a string
1619
+ # create_LLM_content -> new_content is a list
1620
+ if isinstance(new_content, str):
1621
+ questions_answers_json = json.loads(new_content)
1622
+ else:
1623
+ questions_answers_json = new_content
1624
+ questions_answers_text = json.dumps(questions_answers_json, ensure_ascii=False, indent=2)
1625
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, questions_answers_text)
1626
+ updated_content = questions_answers_text
1627
 
1628
  print(f"{kind} 已更新到GCS")
1629
  return gr.update(value=updated_content, interactive=False)
 
1649
  transcript = df_string
1650
  formatted_simple_transcript = create_formatted_simple_transcript(transcript)
1651
  formatted_transcript = create_formatted_transcript(video_id, transcript)
1652
+ gen_content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
1653
+ update_LLM_content(video_id, gen_content, kind)
1654
+ content = json.dumps(gen_content, ensure_ascii=False, indent=2)
1655
  elif kind == "transcript":
1656
+ gen_content = process_transcript_and_screenshots_on_gcs(video_id)
1657
+ update_LLM_content(video_id, gen_content, kind)
1658
+ content = json.dumps(gen_content, ensure_ascii=False, indent=2)
1659
  elif kind == "questions":
1660
+ gen_content = generate_questions(df_string)
1661
+ update_LLM_content(video_id, gen_content, kind)
1662
+ content = json.dumps(gen_content, ensure_ascii=False, indent=2)
1663
+ elif kind == "questions_answers":
1664
+ gen_content = generate_questions_answers(df_string)
1665
+ update_LLM_content(video_id, gen_content, kind)
1666
+ content = json.dumps(gen_content, ensure_ascii=False, indent=2)
1667
 
1668
  return gr.update(value=content, interactive=False)
1669
 
 
1790
 
1791
  return new_summary
1792
 
 
 
1793
 
1794
  # AI 生成教學素材
1795
  def get_meta_data(video_id, source="gcs"):
 
2759
  questions_create_button = gr.Button("重建", size="sm", variant="primary")
2760
  with gr.Row():
2761
  questions_json = gr.Textbox(label="Questions", lines=40, interactive=False, show_copy_button=True)
2762
+ with gr.Tab("問題答案本文"):
2763
+ with gr.Row() as questions_answers_admin:
2764
+ questions_answers_kind = gr.Textbox(value="questions_answers", show_label=False)
2765
+ questions_answers_get_button = gr.Button("取得", size="sm", variant="primary")
2766
+ questions_answers_edit_button = gr.Button("編輯", size="sm", variant="primary")
2767
+ questions_answers_update_button = gr.Button("儲存", size="sm", variant="primary")
2768
+ questions_answers_delete_button = gr.Button("刪除", size="sm", variant="primary")
2769
+ questions_answers_create_button = gr.Button("重建", size="sm", variant="primary")
2770
+ with gr.Row():
2771
+ questions_answers_json = gr.Textbox(label="Questions Answers", lines=40, interactive=False, show_copy_button=True)
2772
  with gr.Tab("逐字稿"):
2773
  simple_html_content = gr.HTML(label="Simple Transcript")
2774
  with gr.Tab("圖文"):
 
3090
  inputs=[video_id, questions_json, questions_kind],
3091
  outputs=[questions_json]
3092
  )
3093
+ # questions_answers event
3094
+ questions_answers_get_button.click(
3095
+ get_LLM_content,
3096
+ inputs=[video_id, questions_answers_kind],
3097
+ outputs=[questions_answers_json]
3098
+ )
3099
+ questions_answers_create_button.click(
3100
+ create_LLM_content,
3101
+ inputs=[video_id, df_string_output, questions_answers_kind],
3102
+ outputs=[questions_answers_json]
3103
+ )
3104
+ questions_answers_delete_button.click(
3105
+ delete_LLM_content,
3106
+ inputs=[video_id, questions_answers_kind],
3107
+ outputs=[questions_answers_json]
3108
+ )
3109
+ questions_answers_edit_button.click(
3110
+ enable_edit_mode,
3111
+ inputs=[],
3112
+ outputs=[questions_answers_json]
3113
+ )
3114
+ questions_answers_update_button.click(
3115
+ update_LLM_content,
3116
+ inputs=[video_id, questions_answers_json, questions_answers_kind],
3117
+ outputs=[questions_answers_json]
3118
+ )
3119
+
3120
 
3121
  # 教師版
3122
  worksheet_content_btn.click(