Spaces:
Running
Running
claude-3 instead of open AI
Browse files
app.py
CHANGED
@@ -678,19 +678,44 @@ def generate_reading_passage(df_string):
|
|
678 |
敘述中,請把數學或是專業術語,用 Latex 包覆($...$),並且不要去改原本的文章
|
679 |
加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
|
680 |
"""
|
681 |
-
messages = [
|
682 |
-
{"role": "system", "content": sys_content},
|
683 |
-
{"role": "user", "content": user_content}
|
684 |
-
]
|
685 |
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
|
|
691 |
|
692 |
-
|
693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
print("=====reading_passage=====")
|
695 |
print(reading_passage)
|
696 |
print("=====reading_passage=====")
|
@@ -755,19 +780,43 @@ def generate_mind_map(df_string):
|
|
755 |
注意:不需要前後文敘述,直接給出 markdown 文本即可
|
756 |
這對我很重要
|
757 |
"""
|
758 |
-
messages = [
|
759 |
-
{"role": "system", "content": sys_content},
|
760 |
-
{"role": "user", "content": user_content}
|
761 |
-
]
|
762 |
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
|
|
768 |
|
769 |
-
|
770 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
771 |
print("=====mind_map=====")
|
772 |
print(mind_map)
|
773 |
print("=====mind_map=====")
|
@@ -891,19 +940,44 @@ def generate_summarise(df_string, metadata=None):
|
|
891 |
# 💡 5. 結論反思(為什麼我們要學這個?)
|
892 |
# ❓ 6. 延伸小問題
|
893 |
|
894 |
-
|
895 |
-
|
896 |
-
|
897 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
898 |
|
899 |
-
request_payload = {
|
900 |
-
"model": "gpt-4-turbo",
|
901 |
-
"messages": messages,
|
902 |
-
"max_tokens": 4000,
|
903 |
-
}
|
904 |
|
905 |
-
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
906 |
-
df_summarise = response.choices[0].message.content.strip()
|
907 |
print("=====df_summarise=====")
|
908 |
print(df_summarise)
|
909 |
print("=====df_summarise=====")
|
@@ -975,26 +1049,50 @@ def generate_questions(df_string):
|
|
975 |
|
976 |
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW"
|
977 |
user_content = f"請根據 {content_text} 生成三個問題,並用 JSON 格式返回 questions:[q1的敘述text, q2的敘述text, q3的敘述text]"
|
978 |
-
|
979 |
-
|
980 |
-
|
981 |
-
|
982 |
-
|
|
|
|
|
983 |
|
984 |
-
|
985 |
-
|
986 |
-
|
987 |
|
988 |
|
989 |
-
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
995 |
|
996 |
-
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
997 |
-
questions = json.loads(response.choices[0].message.content)["questions"]
|
998 |
print("=====json_response=====")
|
999 |
print(questions)
|
1000 |
print("=====json_response=====")
|
@@ -1182,30 +1280,53 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
|
|
1182 |
"keywords": ["關鍵字", "關鍵字"]
|
1183 |
}}]
|
1184 |
"""
|
1185 |
-
messages = [
|
1186 |
-
{"role": "system", "content": sys_content},
|
1187 |
-
{"role": "user", "content": user_content}
|
1188 |
-
]
|
1189 |
-
response_format = { "type": "json_object" }
|
1190 |
-
|
1191 |
-
request_payload = {
|
1192 |
-
"model": "gpt-4-turbo",
|
1193 |
-
"messages": messages,
|
1194 |
-
"max_tokens": 4096,
|
1195 |
-
"response_format": response_format
|
1196 |
-
}
|
1197 |
|
1198 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1199 |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1200 |
print("===response===")
|
1201 |
print(dict(response))
|
1202 |
key_moments = json.loads(response.choices[0].message.content)["key_moments"]
|
1203 |
except Exception as e:
|
1204 |
-
error_msg = f" {video_id} 關鍵時刻錯誤: {str(e)}"
|
1205 |
print("===generate_key_moments error===")
|
1206 |
print(error_msg)
|
1207 |
print("===generate_key_moments error===")
|
1208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1209 |
|
1210 |
print("=====key_moments=====")
|
1211 |
print(key_moments)
|
@@ -1229,18 +1350,43 @@ def generate_key_moments_keywords(transcript):
|
|
1229 |
不用給上下文,直接給出關鍵字,使用 zh-TW,用逗號分隔, example: 關鍵字1, 關鍵字2
|
1230 |
transcript:{transcript}
|
1231 |
"""
|
1232 |
-
messages = [
|
1233 |
-
{"role": "system", "content": system_content},
|
1234 |
-
{"role": "user", "content": user_content}
|
1235 |
-
]
|
1236 |
-
request_payload = {
|
1237 |
-
"model": "gpt-4-turbo",
|
1238 |
-
"messages": messages,
|
1239 |
-
"max_tokens": 100,
|
1240 |
-
}
|
1241 |
|
1242 |
-
|
1243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1244 |
|
1245 |
return keywords
|
1246 |
|
@@ -1599,7 +1745,6 @@ def create_LLM_content(video_id, df_string, kind):
|
|
1599 |
def reading_passage_add_latex_version(video_id):
|
1600 |
# 確認 GCS 是否有 reading_passage.json
|
1601 |
print("===reading_passage_convert_to_latex===")
|
1602 |
-
gcs_client = GCS_CLIENT
|
1603 |
bucket_name = 'video_ai_assistant'
|
1604 |
file_name = f'{video_id}_reading_passage.json'
|
1605 |
blob_name = f"{video_id}/{file_name}"
|
@@ -1652,7 +1797,6 @@ def reading_passage_add_latex_version(video_id):
|
|
1652 |
def summary_add_markdown_version(video_id):
|
1653 |
# 確認 GCS 是否有 summary.json
|
1654 |
print("===summary_convert_to_markdown===")
|
1655 |
-
gcs_client = GCS_CLIENT
|
1656 |
bucket_name = 'video_ai_assistant'
|
1657 |
file_name = f'{video_id}_summary.json'
|
1658 |
blob_name = f"{video_id}/{file_name}"
|
@@ -1826,7 +1970,7 @@ def generate_ai_content(password, df_string, topic, grade, level, specific_featu
|
|
1826 |
request_payload = {
|
1827 |
"model": ai_model_name,
|
1828 |
"messages": messages,
|
1829 |
-
"max_tokens": 4000
|
1830 |
}
|
1831 |
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload)
|
1832 |
return ai_content, prompt
|
|
|
678 |
敘述中,請把數學或是專業術語,用 Latex 包覆($...$),並且不要去改原本的文章
|
679 |
加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
|
680 |
"""
|
|
|
|
|
|
|
|
|
681 |
|
682 |
+
try:
|
683 |
+
# 使用 OPEN AI 生成 Reading Passage
|
684 |
+
messages = [
|
685 |
+
{"role": "system", "content": sys_content},
|
686 |
+
{"role": "user", "content": user_content}
|
687 |
+
]
|
688 |
|
689 |
+
request_payload = {
|
690 |
+
"model": "gpt-4-turbo",
|
691 |
+
"messages": messages,
|
692 |
+
"max_tokens": 4000,
|
693 |
+
}
|
694 |
+
|
695 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
696 |
+
reading_passage = response.choices[0].message.content.strip()
|
697 |
+
except:
|
698 |
+
# 使用 REDROCK 生成 Reading Passage
|
699 |
+
messages = [
|
700 |
+
{"role": "user", "content": user_content}
|
701 |
+
]
|
702 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
703 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
704 |
+
kwargs = {
|
705 |
+
"modelId": model_id,
|
706 |
+
"contentType": "application/json",
|
707 |
+
"accept": "application/json",
|
708 |
+
"body": json.dumps({
|
709 |
+
"anthropic_version": "bedrock-2023-05-31",
|
710 |
+
"max_tokens": 4000,
|
711 |
+
"system": sys_content,
|
712 |
+
"messages": messages
|
713 |
+
})
|
714 |
+
}
|
715 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
716 |
+
response_body = json.loads(response.get('body').read())
|
717 |
+
reading_passage = response_body.get('content')[0].get('text')
|
718 |
+
|
719 |
print("=====reading_passage=====")
|
720 |
print(reading_passage)
|
721 |
print("=====reading_passage=====")
|
|
|
780 |
注意:不需要前後文敘述,直接給出 markdown 文本即可
|
781 |
這對我很重要
|
782 |
"""
|
|
|
|
|
|
|
|
|
783 |
|
784 |
+
try:
|
785 |
+
# 使用 OPEN AI 生成
|
786 |
+
messages = [
|
787 |
+
{"role": "system", "content": sys_content},
|
788 |
+
{"role": "user", "content": user_content}
|
789 |
+
]
|
790 |
|
791 |
+
request_payload = {
|
792 |
+
"model": "gpt-4-turbo",
|
793 |
+
"messages": messages,
|
794 |
+
"max_tokens": 4000,
|
795 |
+
}
|
796 |
+
|
797 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
798 |
+
mind_map = response.choices[0].message.content.strip()
|
799 |
+
except:
|
800 |
+
# 使用 REDROCK 生成
|
801 |
+
messages = [
|
802 |
+
{"role": "user", "content": user_content}
|
803 |
+
]
|
804 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
805 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
806 |
+
kwargs = {
|
807 |
+
"modelId": model_id,
|
808 |
+
"contentType": "application/json",
|
809 |
+
"accept": "application/json",
|
810 |
+
"body": json.dumps({
|
811 |
+
"anthropic_version": "bedrock-2023-05-31",
|
812 |
+
"max_tokens": 4000,
|
813 |
+
"system": sys_content,
|
814 |
+
"messages": messages
|
815 |
+
})
|
816 |
+
}
|
817 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
818 |
+
response_body = json.loads(response.get('body').read())
|
819 |
+
mind_map = response_body.get('content')[0].get('text')
|
820 |
print("=====mind_map=====")
|
821 |
print(mind_map)
|
822 |
print("=====mind_map=====")
|
|
|
940 |
# 💡 5. 結論反思(為什麼我們要學這個?)
|
941 |
# ❓ 6. 延伸小問題
|
942 |
|
943 |
+
try:
|
944 |
+
#OPEN AI
|
945 |
+
messages = [
|
946 |
+
{"role": "system", "content": sys_content},
|
947 |
+
{"role": "user", "content": user_content}
|
948 |
+
]
|
949 |
+
|
950 |
+
request_payload = {
|
951 |
+
"model": "gpt-4-turbo",
|
952 |
+
"messages": messages,
|
953 |
+
"max_tokens": 4000,
|
954 |
+
}
|
955 |
+
|
956 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
957 |
+
df_summarise = response.choices[0].message.content.strip()
|
958 |
+
except:
|
959 |
+
#REDROCK
|
960 |
+
messages = [
|
961 |
+
{"role": "user", "content": user_content}
|
962 |
+
]
|
963 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
964 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
965 |
+
kwargs = {
|
966 |
+
"modelId": model_id,
|
967 |
+
"contentType": "application/json",
|
968 |
+
"accept": "application/json",
|
969 |
+
"body": json.dumps({
|
970 |
+
"anthropic_version": "bedrock-2023-05-31",
|
971 |
+
"max_tokens": 4000,
|
972 |
+
"system": sys_content,
|
973 |
+
"messages": messages
|
974 |
+
})
|
975 |
+
}
|
976 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
977 |
+
response_body = json.loads(response.get('body').read())
|
978 |
+
df_summarise = response_body.get('content')[0].get('text')
|
979 |
|
|
|
|
|
|
|
|
|
|
|
980 |
|
|
|
|
|
981 |
print("=====df_summarise=====")
|
982 |
print(df_summarise)
|
983 |
print("=====df_summarise=====")
|
|
|
1049 |
|
1050 |
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW"
|
1051 |
user_content = f"請根據 {content_text} 生成三個問題,並用 JSON 格式返回 questions:[q1的敘述text, q2的敘述text, q3的敘述text]"
|
1052 |
+
|
1053 |
+
try:
|
1054 |
+
messages = [
|
1055 |
+
{"role": "system", "content": sys_content},
|
1056 |
+
{"role": "user", "content": user_content}
|
1057 |
+
]
|
1058 |
+
response_format = { "type": "json_object" }
|
1059 |
|
1060 |
+
print("=====messages=====")
|
1061 |
+
print(messages)
|
1062 |
+
print("=====messages=====")
|
1063 |
|
1064 |
|
1065 |
+
request_payload = {
|
1066 |
+
"model": "gpt-4-turbo",
|
1067 |
+
"messages": messages,
|
1068 |
+
"max_tokens": 4000,
|
1069 |
+
"response_format": response_format
|
1070 |
+
}
|
1071 |
+
|
1072 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1073 |
+
questions = json.loads(response.choices[0].message.content)["questions"]
|
1074 |
+
except:
|
1075 |
+
messages = [
|
1076 |
+
{"role": "user", "content": user_content}
|
1077 |
+
]
|
1078 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
1079 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
1080 |
+
kwargs = {
|
1081 |
+
"modelId": model_id,
|
1082 |
+
"contentType": "application/json",
|
1083 |
+
"accept": "application/json",
|
1084 |
+
"body": json.dumps({
|
1085 |
+
"anthropic_version": "bedrock-2023-05-31",
|
1086 |
+
"max_tokens": 4000,
|
1087 |
+
"system": sys_content,
|
1088 |
+
"messages": messages
|
1089 |
+
})
|
1090 |
+
}
|
1091 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
1092 |
+
response_body = json.loads(response.get('body').read())
|
1093 |
+
response_completion = response_body.get('content')[0].get('text')
|
1094 |
+
questions = json.loads(response_completion)["questions"]
|
1095 |
|
|
|
|
|
1096 |
print("=====json_response=====")
|
1097 |
print(questions)
|
1098 |
print("=====json_response=====")
|
|
|
1280 |
"keywords": ["關鍵字", "關鍵字"]
|
1281 |
}}]
|
1282 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1283 |
|
1284 |
try:
|
1285 |
+
#OPEN AI
|
1286 |
+
messages = [
|
1287 |
+
{"role": "system", "content": sys_content},
|
1288 |
+
{"role": "user", "content": user_content}
|
1289 |
+
]
|
1290 |
+
response_format = { "type": "json_object" }
|
1291 |
+
|
1292 |
+
request_payload = {
|
1293 |
+
"model": "gpt-4-turbo",
|
1294 |
+
"messages": messages,
|
1295 |
+
"max_tokens": 4096,
|
1296 |
+
"response_format": response_format
|
1297 |
+
}
|
1298 |
+
|
1299 |
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1300 |
print("===response===")
|
1301 |
print(dict(response))
|
1302 |
key_moments = json.loads(response.choices[0].message.content)["key_moments"]
|
1303 |
except Exception as e:
|
1304 |
+
error_msg = f" {video_id} OPEN AI 關鍵時刻錯誤: {str(e)}"
|
1305 |
print("===generate_key_moments error===")
|
1306 |
print(error_msg)
|
1307 |
print("===generate_key_moments error===")
|
1308 |
+
|
1309 |
+
#REDROCK
|
1310 |
+
messages = [
|
1311 |
+
{"role": "user", "content": user_content}
|
1312 |
+
]
|
1313 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
1314 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
1315 |
+
kwargs = {
|
1316 |
+
"modelId": model_id,
|
1317 |
+
"contentType": "application/json",
|
1318 |
+
"accept": "application/json",
|
1319 |
+
"body": json.dumps({
|
1320 |
+
"anthropic_version": "bedrock-2023-05-31",
|
1321 |
+
"max_tokens": 4096,
|
1322 |
+
"system": sys_content,
|
1323 |
+
"messages": messages
|
1324 |
+
})
|
1325 |
+
}
|
1326 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
1327 |
+
response_body = json.loads(response.get('body').read())
|
1328 |
+
response_completion = response_body.get('content')[0].get('text')
|
1329 |
+
key_moments = json.loads(response_completion)["key_moments"]
|
1330 |
|
1331 |
print("=====key_moments=====")
|
1332 |
print(key_moments)
|
|
|
1350 |
不用給上下文,直接給出關鍵字,使用 zh-TW,用逗號分隔, example: 關鍵字1, 關鍵字2
|
1351 |
transcript:{transcript}
|
1352 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1353 |
|
1354 |
+
try:
|
1355 |
+
# OPEN AI
|
1356 |
+
messages = [
|
1357 |
+
{"role": "system", "content": system_content},
|
1358 |
+
{"role": "user", "content": user_content}
|
1359 |
+
]
|
1360 |
+
request_payload = {
|
1361 |
+
"model": "gpt-4-turbo",
|
1362 |
+
"messages": messages,
|
1363 |
+
"max_tokens": 100,
|
1364 |
+
}
|
1365 |
+
|
1366 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1367 |
+
keywords = response.choices[0].message.content.strip().split(", ")
|
1368 |
+
except:
|
1369 |
+
# REDROCK
|
1370 |
+
messages = [
|
1371 |
+
{"role": "user", "content": user_content}
|
1372 |
+
]
|
1373 |
+
model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
|
1374 |
+
# model_id = "anthropic.claude-3-haiku-20240307-v1:0"
|
1375 |
+
kwargs = {
|
1376 |
+
"modelId": model_id,
|
1377 |
+
"contentType": "application/json",
|
1378 |
+
"accept": "application/json",
|
1379 |
+
"body": json.dumps({
|
1380 |
+
"anthropic_version": "bedrock-2023-05-31",
|
1381 |
+
"max_tokens": 100,
|
1382 |
+
"system": system_content,
|
1383 |
+
"messages": messages
|
1384 |
+
})
|
1385 |
+
}
|
1386 |
+
response = BEDROCK_CLIENT.invoke_model(**kwargs)
|
1387 |
+
response_body = json.loads(response.get('body').read())
|
1388 |
+
response_completion = response_body.get('content')[0].get('text')
|
1389 |
+
keywords = response_completion.strip().split(", ")
|
1390 |
|
1391 |
return keywords
|
1392 |
|
|
|
1745 |
def reading_passage_add_latex_version(video_id):
|
1746 |
# 確認 GCS 是否有 reading_passage.json
|
1747 |
print("===reading_passage_convert_to_latex===")
|
|
|
1748 |
bucket_name = 'video_ai_assistant'
|
1749 |
file_name = f'{video_id}_reading_passage.json'
|
1750 |
blob_name = f"{video_id}/{file_name}"
|
|
|
1797 |
def summary_add_markdown_version(video_id):
|
1798 |
# 確認 GCS 是否有 summary.json
|
1799 |
print("===summary_convert_to_markdown===")
|
|
|
1800 |
bucket_name = 'video_ai_assistant'
|
1801 |
file_name = f'{video_id}_summary.json'
|
1802 |
blob_name = f"{video_id}/{file_name}"
|
|
|
1970 |
request_payload = {
|
1971 |
"model": ai_model_name,
|
1972 |
"messages": messages,
|
1973 |
+
"max_tokens": 4000
|
1974 |
}
|
1975 |
ai_content = material.send_ai_request(OPEN_AI_CLIENT, request_payload)
|
1976 |
return ai_content, prompt
|