Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on Apr 19, 2024

Commit

3e2f38e

1 Parent(s): 9089b62

questions_answers = generate_questions_answers(df_string)

Browse files

Files changed (1) hide show

app.py +15 -6

app.py CHANGED Viewed

@@ -1119,15 +1119,19 @@ def generate_questions_answers(df_string):
         df_string_json = json.loads(df_string)
     else:
         df_string_json = df_string
-    content_text = ""
-    for entry in df_string_json:
-        content_text += entry["text"] + "，"
     # JSON FORMAT: [{"question": "問題", "answer": "答案"}, ...]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，並用既有資料為本質猜測用戶可能會問的問題，使用 zh-TW"
     user_content = f"""
-        請根據 {content_text} 生成三個問題，主要與學科有關，不要問跟情節故事相關的問題
         並用 JSON 格式返回 questions_answers: [{{question: q1的敘述text, answer: q1的答案text}}, ...]
         k-v pair 的 key 是 question, value 是 answer
     """
@@ -1651,7 +1655,12 @@ def create_LLM_content(video_id, df_string, kind):
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions_answers":
-        gen_content = generate_questions_answers(df_string)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)

         df_string_json = json.loads(df_string)
     else:
         df_string_json = df_string
+    content_text = json.dumps(df_string_json, ensure_ascii=False, indent=2)
+    print("=====content_text=====")
+    print(content_text)
+    print("=====content_text=====")
     # JSON FORMAT: [{"question": "問題", "answer": "答案"}, ...]
     sys_content = "你是一個擅長資料分析跟影片教學的老師，user 為學生，請精讀資料文本，自行判斷資料的種類，並用既有資料為本質猜測用戶可能會問的問題，使用 zh-TW"
     user_content = f"""
+        請根據 {content_text} 生成三個問題跟答案，主要與學科有關，不要問跟情節故事相關的問題
+        答案要在最後標示出處【參考：00:01:05】，請根據時間軸 start_time 來標示
+        請確保問題跟答案都是繁體中文 zh-TW
+        答案不用是標準答案，而是帶有啟發性的蘇格拉底式問答，讓學生思考本來的問題，以及該去參考的時間點
         並用 JSON 格式返回 questions_answers: [{{question: q1的敘述text, answer: q1的答案text}}, ...]
         k-v pair 的 key 是 question, value 是 answer
     """
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions_answers":
+        if isinstance(df_string, str):
+            transcript = json.loads(df_string)
+        else:
+            transcript = df_string
+        formatted_simple_transcript = create_formatted_simple_transcript(transcript)
+        gen_content = generate_questions_answers(formatted_simple_transcript)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)