Spaces:
Running
Running
update
Browse files- app.py +91 -18
- requirements.txt +2 -2
app.py
CHANGED
@@ -34,10 +34,9 @@ from googleapiclient.http import MediaIoBaseUpload
|
|
34 |
|
35 |
from educational_material import EducationalMaterial
|
36 |
from storage_service import GoogleCloudStorage
|
37 |
-
from google.cloud import aiplatform
|
38 |
-
from vertexai.preview.generative_models import GenerativeModel
|
39 |
from google.oauth2.service_account import Credentials
|
40 |
-
|
|
|
41 |
|
42 |
import boto3
|
43 |
|
@@ -102,12 +101,11 @@ GOOGPE_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
|
|
102 |
google_creds = Credentials.from_service_account_info(
|
103 |
google_service_account_info_dict, scopes=GOOGPE_SCOPES
|
104 |
)
|
105 |
-
|
106 |
project="junyiacademy",
|
107 |
service_account=google_service_account_info_dict,
|
108 |
credentials=google_creds,
|
109 |
)
|
110 |
-
GEMINI_MODEL = GenerativeModel("gemini-pro")
|
111 |
|
112 |
# CLIENTS CONFIG
|
113 |
GBQ_CLIENT = bigquery.Client.from_service_account_info(json.loads(GBQ_KEY))
|
@@ -789,7 +787,9 @@ def generate_content_by_open_ai(sys_content, user_content, response_format=None,
|
|
789 |
def generate_content_by_gemini(sys_content, user_content, response_format=None, model_name=None):
|
790 |
print("generate_content_by_gemini")
|
791 |
print(f"LLM using: {model_name}")
|
792 |
-
|
|
|
|
|
793 |
f"{sys_content}, {user_content}"
|
794 |
)
|
795 |
content = model_response.candidates[0].content.parts[0].text
|
@@ -799,8 +799,9 @@ def generate_content_by_gemini(sys_content, user_content, response_format=None,
|
|
799 |
def generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=None, model_name=None):
|
800 |
# 使用 OpenAI 生成基于上传数据的问题
|
801 |
|
802 |
-
if LLM_model
|
803 |
print(f"LLM: {LLM_model}")
|
|
|
804 |
content = generate_content_by_gemini(sys_content, user_content, response_format, model_name=model_name)
|
805 |
# elif LLM_model == "anthropic-claude-3-sonnet":
|
806 |
# print(f"LLM: {LLM_model}")
|
@@ -2175,7 +2176,7 @@ def download_exam_result(content):
|
|
2175 |
return word_path
|
2176 |
|
2177 |
# ---- Chatbot ----
|
2178 |
-
def get_instructions(content_subject, content_grade, key_moments, socratic_mode=True):
|
2179 |
if socratic_mode:
|
2180 |
method = "Socratic style, guide thinking, no direct answers. this is very important, please be seriously following."
|
2181 |
else:
|
@@ -2185,18 +2186,59 @@ def get_instructions(content_subject, content_grade, key_moments, socratic_mode=
|
|
2185 |
subject: {content_subject}
|
2186 |
grade: {content_grade}
|
2187 |
context: {key_moments}
|
|
|
2188 |
Assistant Role: you are a {content_subject} assistant. you can call yourself as {content_subject} 學伴
|
2189 |
User Role: {content_grade} th-grade student.
|
2190 |
Method: {method}
|
2191 |
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2192 |
Response:
|
2193 |
- if user say hi or hello or any greeting, just say hi back and introduce yourself. Then tell user to ask question in context.
|
2194 |
-
- Single question, under 100 characters
|
2195 |
- include math symbols (use LaTeX $ to cover before and after, ex: $x^2$)
|
2196 |
- hint with video timestamp which format 【參考:00:00:00】.
|
2197 |
-
- Sometimes encourage user
|
2198 |
-
- if user ask questions not include in context,
|
2199 |
-
- just tell them to ask the question in context and give them example question.
|
2200 |
Restrictions:
|
2201 |
- Answer within video content, no external references
|
2202 |
- don't repeat user's question, guide them to think more.
|
@@ -2309,8 +2351,19 @@ def chat_with_any_ai(ai_type, password, video_id, user_data, transcript_state, k
|
|
2309 |
moment.pop('images', None)
|
2310 |
moment.pop('end', None)
|
2311 |
moment.pop('transcript', None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2312 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2313 |
-
instructions = get_instructions(content_subject, content_grade, key_moments_text, socratic_mode)
|
2314 |
print(f"=== instructions:{instructions} ===")
|
2315 |
user_message_note = "/n 請嚴格遵循instructions,擔任一位蘇格拉底家教,絕對不要重複 user 的問句,請用引導的方式指引方向,請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,請在回答的最後標註【參考:(時):(分):(秒)】,(如果是反問學生,就只問一個問題,請幫助學生更好的理解資料,字數在100字以內,回答時如果講到數學專有名詞,請用數學符號代替文字(Latex 用 $ 字號 render, ex: $x^2$)"
|
2316 |
user_content = user_message + user_message_note
|
@@ -2383,9 +2436,19 @@ def get_chatbot_config(ai_name, transcript_state, key_moments, content_subject,
|
|
2383 |
moment.pop('images', None)
|
2384 |
moment.pop('end', None)
|
2385 |
moment.pop('transcript', None)
|
|
|
2386 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2387 |
|
2388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2389 |
|
2390 |
chatbot_config = {
|
2391 |
"video_id": video_id,
|
@@ -2689,7 +2752,7 @@ def poll_run_status(run_id, thread_id, timeout=600, poll_interval=5):
|
|
2689 |
|
2690 |
return run.status
|
2691 |
|
2692 |
-
def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password, video_id, user_data, thread_id,
|
2693 |
verify_password(password)
|
2694 |
|
2695 |
print("=====user_data=====")
|
@@ -2741,9 +2804,19 @@ def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password,
|
|
2741 |
moment.pop('images', None)
|
2742 |
moment.pop('end', None)
|
2743 |
moment.pop('transcript', None)
|
|
|
2744 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2745 |
|
2746 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2747 |
# 创建线程
|
2748 |
if not thread_id:
|
2749 |
thread = client.beta.threads.create()
|
@@ -2997,7 +3070,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2997 |
)
|
2998 |
ai_chatbot_ai_type = gr.Textbox(value="chat_completions", visible=True)
|
2999 |
ai_chatbot_thread_id = gr.Textbox(label="thread_id", visible=True)
|
3000 |
-
ai_chatbot_socratic_mode_btn = gr.Checkbox(label="蘇格拉底家教助理模式", value=
|
3001 |
latex_delimiters = [{"left": "$", "right": "$", "display": False}]
|
3002 |
with gr.Accordion("選擇 AI 小精靈", elem_id="chatbot_select_accordion") as chatbot_select_accordion:
|
3003 |
with gr.Row():
|
@@ -3227,7 +3300,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
3227 |
with gr.Accordion("See Details", open=False) as see_details:
|
3228 |
with gr.Row():
|
3229 |
is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
|
3230 |
-
LLM_model = gr.Dropdown(label="LLM Model", choices=["open-ai-gpt-4o", "anthropic-claude-3-sonnet", "gemini-pro"], value="open-ai-gpt-4o", visible=True, interactive=True)
|
3231 |
with gr.Tab("逐字稿本文"):
|
3232 |
with gr.Row() as transcript_admmin:
|
3233 |
transcript_kind = gr.Textbox(value="transcript", show_label=False)
|
|
|
34 |
|
35 |
from educational_material import EducationalMaterial
|
36 |
from storage_service import GoogleCloudStorage
|
|
|
|
|
37 |
from google.oauth2.service_account import Credentials
|
38 |
+
import vertexai
|
39 |
+
from vertexai.generative_models import GenerativeModel, Part
|
40 |
|
41 |
import boto3
|
42 |
|
|
|
101 |
google_creds = Credentials.from_service_account_info(
|
102 |
google_service_account_info_dict, scopes=GOOGPE_SCOPES
|
103 |
)
|
104 |
+
vertexai.init(
|
105 |
project="junyiacademy",
|
106 |
service_account=google_service_account_info_dict,
|
107 |
credentials=google_creds,
|
108 |
)
|
|
|
109 |
|
110 |
# CLIENTS CONFIG
|
111 |
GBQ_CLIENT = bigquery.Client.from_service_account_info(json.loads(GBQ_KEY))
|
|
|
787 |
def generate_content_by_gemini(sys_content, user_content, response_format=None, model_name=None):
|
788 |
print("generate_content_by_gemini")
|
789 |
print(f"LLM using: {model_name}")
|
790 |
+
|
791 |
+
gemini_model = GenerativeModel(model_name=model_name)
|
792 |
+
model_response = gemini_model.generate_content(
|
793 |
f"{sys_content}, {user_content}"
|
794 |
)
|
795 |
content = model_response.candidates[0].content.parts[0].text
|
|
|
799 |
def generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=None, model_name=None):
|
800 |
# 使用 OpenAI 生成基于上传数据的问题
|
801 |
|
802 |
+
if LLM_model in ["gemini-1.5-pro","gemini-1.5-flash"]:
|
803 |
print(f"LLM: {LLM_model}")
|
804 |
+
model_name = LLM_model
|
805 |
content = generate_content_by_gemini(sys_content, user_content, response_format, model_name=model_name)
|
806 |
# elif LLM_model == "anthropic-claude-3-sonnet":
|
807 |
# print(f"LLM: {LLM_model}")
|
|
|
2176 |
return word_path
|
2177 |
|
2178 |
# ---- Chatbot ----
|
2179 |
+
def get_instructions(content_subject, content_grade, transcript_text, key_moments, socratic_mode=True):
|
2180 |
if socratic_mode:
|
2181 |
method = "Socratic style, guide thinking, no direct answers. this is very important, please be seriously following."
|
2182 |
else:
|
|
|
2186 |
subject: {content_subject}
|
2187 |
grade: {content_grade}
|
2188 |
context: {key_moments}
|
2189 |
+
transcript_text: {transcript_text}
|
2190 |
Assistant Role: you are a {content_subject} assistant. you can call yourself as {content_subject} 學伴
|
2191 |
User Role: {content_grade} th-grade student.
|
2192 |
Method: {method}
|
2193 |
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
|
2194 |
+
Strategy:
|
2195 |
+
- You are a professional tutor, and you will use the following teaching strategies based on the textbook content.
|
2196 |
+
# General Strategies
|
2197 |
+
Needs Analysis:
|
2198 |
+
The tutor/assistant teacher should be able to conduct dynamic needs analysis based on the student's responses. Use questions to understand the student's needs and difficulties.
|
2199 |
+
Example questions: "What do you want to learn today?" or "What difficulties are you encountering in this part of the content?"
|
2200 |
+
|
2201 |
+
Dynamic Goal Setting:
|
2202 |
+
Set learning goals based on student feedback, which can be short-term or long-term. The tutor/assistant teacher can adjust the plan automatically according to the student's progress.
|
2203 |
+
Example questions: "What is our goal for this week?" or "What tasks do you hope to complete today?"
|
2204 |
+
|
2205 |
+
Flexible Teaching Methods:
|
2206 |
+
Provide different teaching methods and resources based on the student's age and learning style. The tutor/assistant teacher can adjust teaching strategies based on student feedback.
|
2207 |
+
Example questions: "Do you prefer learning through videos or reading materials?" or "We can understand this problem through examples, what do you think?"
|
2208 |
+
|
2209 |
+
Patience and Encouragement:
|
2210 |
+
Provide positive feedback and encouragement, especially when students encounter difficulties. The tutor/assistant teacher should be able to detect the student's emotions and provide appropriate support.
|
2211 |
+
Example questions: "Don't worry, let's try again." or "You did well, keep it up!"
|
2212 |
+
|
2213 |
+
Regular Feedback and Evaluation:
|
2214 |
+
Regularly evaluate the student's learning progress and provide feedback. The tutor/assistant teacher can use tests and practice questions to assess the student's understanding.
|
2215 |
+
Example questions: "Let's check your progress." or "How do you feel about your learning progress during this period?"
|
2216 |
+
|
2217 |
+
Good Communication Skills:
|
2218 |
+
Maintain good communication with students, responding to their questions and needs in a timely manner. The tutor/assistant teacher should be able to identify and solve students' problems.
|
2219 |
+
Example questions: "Is there any problem that you need my help with?" or "Is this part clear to you?"
|
2220 |
+
|
2221 |
+
Maintaining Professionalism:
|
2222 |
+
Continue learning and improving teaching skills, and maintain punctuality and responsibility. The tutor/assistant teacher should provide accurate and up-to-date information.
|
2223 |
+
Example questions: "What is our learning goal for today?" or "Remember to study a little bit every day, and gradually accumulate knowledge."
|
2224 |
+
|
2225 |
+
Creating a Positive Learning Environment:
|
2226 |
+
Create a positive, supportive, and motivating learning atmosphere. The tutor/assistant teacher should suggest students take breaks and relax at appropriate times.
|
2227 |
+
Example questions: "Let's take a break and continue studying afterward." or "How do you feel about this learning environment? Do we need any adjustments?"
|
2228 |
+
|
2229 |
+
# Specific Applications
|
2230 |
+
The tutor/assistant teacher can automatically adjust the depth and complexity of the questions based on these general strategies by grade. For example:
|
2231 |
+
- Kindergarten and Elementary School Students: Use simple vocabulary and concrete examples, with more pictures and gamified content.
|
2232 |
+
- Middle School Students: Use interactive and practical methods, such as quizzes and group discussions.
|
2233 |
+
- High School Students: Use deep learning and critical thinking exercises, such as project research and discussions.
|
2234 |
+
- Adult Learners: Emphasize practical applications and work-related content, such as case studies and workshops.
|
2235 |
+
|
2236 |
Response:
|
2237 |
- if user say hi or hello or any greeting, just say hi back and introduce yourself. Then tell user to ask question in context.
|
|
|
2238 |
- include math symbols (use LaTeX $ to cover before and after, ex: $x^2$)
|
2239 |
- hint with video timestamp which format 【參考:00:00:00】.
|
2240 |
+
- Sometimes encourage user with relaxing atmosphere.
|
2241 |
+
- if user ask questions not include in context, just tell them to ask the question in context and give them example question.
|
|
|
2242 |
Restrictions:
|
2243 |
- Answer within video content, no external references
|
2244 |
- don't repeat user's question, guide them to think more.
|
|
|
2351 |
moment.pop('images', None)
|
2352 |
moment.pop('end', None)
|
2353 |
moment.pop('transcript', None)
|
2354 |
+
moment.pop('suggested_images', None)
|
2355 |
+
|
2356 |
+
if isinstance(transcript_state, str):
|
2357 |
+
transcript_state_json = json.loads(transcript_state)
|
2358 |
+
else:
|
2359 |
+
transcript_state_json = transcript_state
|
2360 |
+
# remain only text
|
2361 |
+
transcript_text = ""
|
2362 |
+
for content in transcript_state_json:
|
2363 |
+
transcript_text += content["text"] + ","
|
2364 |
+
|
2365 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2366 |
+
instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
|
2367 |
print(f"=== instructions:{instructions} ===")
|
2368 |
user_message_note = "/n 請嚴格遵循instructions,擔任一位蘇格拉底家教,絕對不要重複 user 的問句,請用引導的方式指引方向,請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,請在回答的最後標註【參考:(時):(分):(秒)】,(如果是反問學生,就只問一個問題,請幫助學生更好的理解資料,字數在100字以內,回答時如果講到數學專有名詞,請用數學符號代替文字(Latex 用 $ 字號 render, ex: $x^2$)"
|
2369 |
user_content = user_message + user_message_note
|
|
|
2436 |
moment.pop('images', None)
|
2437 |
moment.pop('end', None)
|
2438 |
moment.pop('transcript', None)
|
2439 |
+
moment.pop('suggested_images', None)
|
2440 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2441 |
|
2442 |
+
if isinstance(transcript_state, str):
|
2443 |
+
transcript_state_json = json.loads(transcript_state)
|
2444 |
+
else:
|
2445 |
+
transcript_state_json = transcript_state
|
2446 |
+
# remain only text
|
2447 |
+
transcript_text = ""
|
2448 |
+
for content in transcript_state_json:
|
2449 |
+
transcript_text += content["text"] + ","
|
2450 |
+
|
2451 |
+
instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
|
2452 |
|
2453 |
chatbot_config = {
|
2454 |
"video_id": video_id,
|
|
|
2752 |
|
2753 |
return run.status
|
2754 |
|
2755 |
+
def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password, video_id, user_data, thread_id, transcript_state, key_moments, content_subject, content_grade, socratic_mode=True):
|
2756 |
verify_password(password)
|
2757 |
|
2758 |
print("=====user_data=====")
|
|
|
2804 |
moment.pop('images', None)
|
2805 |
moment.pop('end', None)
|
2806 |
moment.pop('transcript', None)
|
2807 |
+
moment.pop('suggested_images', None)
|
2808 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
2809 |
|
2810 |
+
if isinstance(transcript_state, str):
|
2811 |
+
transcript_state_json = json.loads(transcript_state)
|
2812 |
+
else:
|
2813 |
+
transcript_state_json = transcript_state
|
2814 |
+
# remain only text
|
2815 |
+
transcript_text = ""
|
2816 |
+
for content in transcript_state_json:
|
2817 |
+
transcript_text += content["text"] + ","
|
2818 |
+
|
2819 |
+
instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
|
2820 |
# 创建线程
|
2821 |
if not thread_id:
|
2822 |
thread = client.beta.threads.create()
|
|
|
3070 |
)
|
3071 |
ai_chatbot_ai_type = gr.Textbox(value="chat_completions", visible=True)
|
3072 |
ai_chatbot_thread_id = gr.Textbox(label="thread_id", visible=True)
|
3073 |
+
ai_chatbot_socratic_mode_btn = gr.Checkbox(label="蘇格拉底家教助理模式", value=False, visible=True)
|
3074 |
latex_delimiters = [{"left": "$", "right": "$", "display": False}]
|
3075 |
with gr.Accordion("選擇 AI 小精靈", elem_id="chatbot_select_accordion") as chatbot_select_accordion:
|
3076 |
with gr.Row():
|
|
|
3300 |
with gr.Accordion("See Details", open=False) as see_details:
|
3301 |
with gr.Row():
|
3302 |
is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
|
3303 |
+
LLM_model = gr.Dropdown(label="LLM Model", choices=["open-ai-gpt-4o", "anthropic-claude-3-sonnet", "gemini-1.5-pro", "gemini-1.5-flash"], value="open-ai-gpt-4o", visible=True, interactive=True)
|
3304 |
with gr.Tab("逐字稿本文"):
|
3305 |
with gr.Row() as transcript_admmin:
|
3306 |
transcript_kind = gr.Textbox(value="transcript", show_label=False)
|
requirements.txt
CHANGED
@@ -12,11 +12,11 @@ google-auth-httplib2
|
|
12 |
google-auth-oauthlib
|
13 |
google-cloud-storage
|
14 |
google-cloud-bigquery
|
15 |
-
google-
|
16 |
groq
|
17 |
yt_dlp
|
18 |
uuid
|
19 |
gtts
|
20 |
-
boto3
|
21 |
pydub
|
22 |
vertexai
|
|
|
12 |
google-auth-oauthlib
|
13 |
google-cloud-storage
|
14 |
google-cloud-bigquery
|
15 |
+
google-generativeai
|
16 |
groq
|
17 |
yt_dlp
|
18 |
uuid
|
19 |
gtts
|
20 |
+
boto3==1.28.57
|
21 |
pydub
|
22 |
vertexai
|