Spaces:
Running
Running
has_keywords_added = True
Browse files- app.py +3 -0
- chatbot.py +15 -29
app.py
CHANGED
@@ -1088,11 +1088,14 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
|
|
1088 |
print("===keywords===")
|
1089 |
print(key_moment["keywords"])
|
1090 |
print("===keywords===")
|
|
|
1091 |
if has_keywords_added:
|
1092 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
|
1093 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
|
1094 |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
1095 |
key_moments_json = json.loads(key_moments_text)
|
|
|
|
|
1096 |
|
1097 |
elif source == "drive":
|
1098 |
print("===get_key_moments on drive===")
|
|
|
1088 |
print("===keywords===")
|
1089 |
print(key_moment["keywords"])
|
1090 |
print("===keywords===")
|
1091 |
+
has_keywords_added = True
|
1092 |
if has_keywords_added:
|
1093 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
|
1094 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
|
1095 |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
1096 |
key_moments_json = json.loads(key_moments_text)
|
1097 |
+
print("======key_moments_json=====")
|
1098 |
+
print(key_moments_json)
|
1099 |
|
1100 |
elif source == "drive":
|
1101 |
print("===get_key_moments on drive===")
|
chatbot.py
CHANGED
@@ -31,6 +31,8 @@ class Chatbot:
|
|
31 |
# key_moments_json remove images
|
32 |
for moment in key_moments_json:
|
33 |
moment.pop('images', None)
|
|
|
|
|
34 |
|
35 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
36 |
return key_moments_text
|
@@ -54,38 +56,22 @@ class Chatbot:
|
|
54 |
socratic_mode = str(socratic_mode)
|
55 |
ai_name = self.ai_name
|
56 |
|
57 |
-
# string with maximum length 32768
|
58 |
-
# if transcript_text is too long, replace by key_moments_text
|
59 |
-
if len(transcript_text) > 25000:
|
60 |
-
content_text = key_moments_text
|
61 |
-
print("=== transcript_text is too long, replace by key_moments_text ===")
|
62 |
-
else:
|
63 |
-
content_text = transcript_text
|
64 |
-
print("=== transcript_text is used ===")
|
65 |
-
|
66 |
system_prompt = f"""
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-------------------------------------
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
- 請一定要用繁體中文回答 zh-TW,並用台灣人的口語表達,回答時不用特別說明這是台灣人的語氣,也不用說這是「台語的說法」
|
80 |
-
- 請用 {content_grade} 的學生能懂的方式回答,不用提到「逐字稿」這個詞
|
81 |
-
- 如果學生問了一些問題你無法判斷,請告訴學生你無法判斷,並建議學生可以問其他問題
|
82 |
-
- 或者你可以反問學生一些問題,幫助學生更好的理解資料,字數在100字以內
|
83 |
-
- 如果學生的問題與資料文本無關,請告訴學生你「無法回答超出影片範圍的問題」,並告訴他可以怎麼問什麼樣的問題(一個就好)
|
84 |
-
- 回答範圍一定要在逐字稿資料內,不要引用其他資料,請嚴格執行
|
85 |
-
- 並給予學生鼓勵,讓學生有學習的動力
|
86 |
-
- 回答時數學式請用數學符號代替文字(Latex 用 $ 字號 render)
|
87 |
-
- 只要是參考逐字稿資料,please use the timestamp format and give only one reference, example:【參考資料:00:00:00】
|
88 |
"""
|
|
|
|
|
89 |
|
90 |
return system_prompt
|
91 |
|
|
|
31 |
# key_moments_json remove images
|
32 |
for moment in key_moments_json:
|
33 |
moment.pop('images', None)
|
34 |
+
moment.pop('end', None)
|
35 |
+
moment.pop('transcript', None)
|
36 |
|
37 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
38 |
return key_moments_text
|
|
|
56 |
socratic_mode = str(socratic_mode)
|
57 |
ai_name = self.ai_name
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
system_prompt = f"""
|
60 |
+
subject: {content_subject}
|
61 |
+
grade: {content_grade}
|
62 |
+
context: {key_moments_text}
|
63 |
-------------------------------------
|
64 |
+
Role: {content_subject} teacher, {content_grade} th-grade student.
|
65 |
+
Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
|
66 |
+
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
|
67 |
+
Response: Single question, under 100 characters, include math symbols (use LaTeX $), hint with video timestamp which format 【00:00:00】.
|
68 |
+
Sometimes encourage user by Taiwanese tone.
|
69 |
+
if user ask questions not include in key_moments_text,
|
70 |
+
just tell them to ask the question in context and give them example question.
|
71 |
+
Restrictions: Answer within video content, no external references
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
"""
|
73 |
+
print("====system_prompt====")
|
74 |
+
print(system_prompt)
|
75 |
|
76 |
return system_prompt
|
77 |
|