youngtsai commited on
Commit
28353ce
·
1 Parent(s): b96d724
Files changed (2) hide show
  1. app.py +91 -18
  2. requirements.txt +2 -2
app.py CHANGED
@@ -34,10 +34,9 @@ from googleapiclient.http import MediaIoBaseUpload
34
 
35
  from educational_material import EducationalMaterial
36
  from storage_service import GoogleCloudStorage
37
- from google.cloud import aiplatform
38
- from vertexai.preview.generative_models import GenerativeModel
39
  from google.oauth2.service_account import Credentials
40
-
 
41
 
42
  import boto3
43
 
@@ -102,12 +101,11 @@ GOOGPE_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
102
  google_creds = Credentials.from_service_account_info(
103
  google_service_account_info_dict, scopes=GOOGPE_SCOPES
104
  )
105
- aiplatform.init(
106
  project="junyiacademy",
107
  service_account=google_service_account_info_dict,
108
  credentials=google_creds,
109
  )
110
- GEMINI_MODEL = GenerativeModel("gemini-pro")
111
 
112
  # CLIENTS CONFIG
113
  GBQ_CLIENT = bigquery.Client.from_service_account_info(json.loads(GBQ_KEY))
@@ -789,7 +787,9 @@ def generate_content_by_open_ai(sys_content, user_content, response_format=None,
789
  def generate_content_by_gemini(sys_content, user_content, response_format=None, model_name=None):
790
  print("generate_content_by_gemini")
791
  print(f"LLM using: {model_name}")
792
- model_response = GEMINI_MODEL.generate_content(
 
 
793
  f"{sys_content}, {user_content}"
794
  )
795
  content = model_response.candidates[0].content.parts[0].text
@@ -799,8 +799,9 @@ def generate_content_by_gemini(sys_content, user_content, response_format=None,
799
  def generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=None, model_name=None):
800
  # 使用 OpenAI 生成基于上传数据的问题
801
 
802
- if LLM_model == "gemini-pro":
803
  print(f"LLM: {LLM_model}")
 
804
  content = generate_content_by_gemini(sys_content, user_content, response_format, model_name=model_name)
805
  # elif LLM_model == "anthropic-claude-3-sonnet":
806
  # print(f"LLM: {LLM_model}")
@@ -2175,7 +2176,7 @@ def download_exam_result(content):
2175
  return word_path
2176
 
2177
  # ---- Chatbot ----
2178
- def get_instructions(content_subject, content_grade, key_moments, socratic_mode=True):
2179
  if socratic_mode:
2180
  method = "Socratic style, guide thinking, no direct answers. this is very important, please be seriously following."
2181
  else:
@@ -2185,18 +2186,59 @@ def get_instructions(content_subject, content_grade, key_moments, socratic_mode=
2185
  subject: {content_subject}
2186
  grade: {content_grade}
2187
  context: {key_moments}
 
2188
  Assistant Role: you are a {content_subject} assistant. you can call yourself as {content_subject} 學伴
2189
  User Role: {content_grade} th-grade student.
2190
  Method: {method}
2191
  Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2192
  Response:
2193
  - if user say hi or hello or any greeting, just say hi back and introduce yourself. Then tell user to ask question in context.
2194
- - Single question, under 100 characters
2195
  - include math symbols (use LaTeX $ to cover before and after, ex: $x^2$)
2196
  - hint with video timestamp which format 【參考:00:00:00】.
2197
- - Sometimes encourage user by Taiwanese style with relaxing atmosphere.
2198
- - if user ask questions not include in context,
2199
- - just tell them to ask the question in context and give them example question.
2200
  Restrictions:
2201
  - Answer within video content, no external references
2202
  - don't repeat user's question, guide them to think more.
@@ -2309,8 +2351,19 @@ def chat_with_any_ai(ai_type, password, video_id, user_data, transcript_state, k
2309
  moment.pop('images', None)
2310
  moment.pop('end', None)
2311
  moment.pop('transcript', None)
 
 
 
 
 
 
 
 
 
 
 
2312
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2313
- instructions = get_instructions(content_subject, content_grade, key_moments_text, socratic_mode)
2314
  print(f"=== instructions:{instructions} ===")
2315
  user_message_note = "/n 請嚴格遵循instructions,擔任一位蘇格拉底家教,絕對不要重複 user 的問句,請用引導的方式指引方向,請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,請在回答的最後標註【參考:(時):(分):(秒)】,(如果是反問學生,就只問一個問題,請幫助學生更好的理解資料,字數在100字以內,回答時如果講到數學專有名詞,請用數學符號代替文字(Latex 用 $ 字號 render, ex: $x^2$)"
2316
  user_content = user_message + user_message_note
@@ -2383,9 +2436,19 @@ def get_chatbot_config(ai_name, transcript_state, key_moments, content_subject,
2383
  moment.pop('images', None)
2384
  moment.pop('end', None)
2385
  moment.pop('transcript', None)
 
2386
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2387
 
2388
- instructions = get_instructions(content_subject, content_grade, key_moments_text, socratic_mode)
 
 
 
 
 
 
 
 
 
2389
 
2390
  chatbot_config = {
2391
  "video_id": video_id,
@@ -2689,7 +2752,7 @@ def poll_run_status(run_id, thread_id, timeout=600, poll_interval=5):
2689
 
2690
  return run.status
2691
 
2692
- def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password, video_id, user_data, thread_id, trascript, key_moments, content_subject, content_grade, socratic_mode=True):
2693
  verify_password(password)
2694
 
2695
  print("=====user_data=====")
@@ -2741,9 +2804,19 @@ def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password,
2741
  moment.pop('images', None)
2742
  moment.pop('end', None)
2743
  moment.pop('transcript', None)
 
2744
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2745
 
2746
- instructions = get_instructions(content_subject, content_grade, key_moments_text, socratic_mode)
 
 
 
 
 
 
 
 
 
2747
  # 创建线程
2748
  if not thread_id:
2749
  thread = client.beta.threads.create()
@@ -2997,7 +3070,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2997
  )
2998
  ai_chatbot_ai_type = gr.Textbox(value="chat_completions", visible=True)
2999
  ai_chatbot_thread_id = gr.Textbox(label="thread_id", visible=True)
3000
- ai_chatbot_socratic_mode_btn = gr.Checkbox(label="蘇格拉底家教助理模式", value=True, visible=True)
3001
  latex_delimiters = [{"left": "$", "right": "$", "display": False}]
3002
  with gr.Accordion("選擇 AI 小精靈", elem_id="chatbot_select_accordion") as chatbot_select_accordion:
3003
  with gr.Row():
@@ -3227,7 +3300,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
3227
  with gr.Accordion("See Details", open=False) as see_details:
3228
  with gr.Row():
3229
  is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
3230
- LLM_model = gr.Dropdown(label="LLM Model", choices=["open-ai-gpt-4o", "anthropic-claude-3-sonnet", "gemini-pro"], value="open-ai-gpt-4o", visible=True, interactive=True)
3231
  with gr.Tab("逐字稿本文"):
3232
  with gr.Row() as transcript_admmin:
3233
  transcript_kind = gr.Textbox(value="transcript", show_label=False)
 
34
 
35
  from educational_material import EducationalMaterial
36
  from storage_service import GoogleCloudStorage
 
 
37
  from google.oauth2.service_account import Credentials
38
+ import vertexai
39
+ from vertexai.generative_models import GenerativeModel, Part
40
 
41
  import boto3
42
 
 
101
  google_creds = Credentials.from_service_account_info(
102
  google_service_account_info_dict, scopes=GOOGPE_SCOPES
103
  )
104
+ vertexai.init(
105
  project="junyiacademy",
106
  service_account=google_service_account_info_dict,
107
  credentials=google_creds,
108
  )
 
109
 
110
  # CLIENTS CONFIG
111
  GBQ_CLIENT = bigquery.Client.from_service_account_info(json.loads(GBQ_KEY))
 
787
  def generate_content_by_gemini(sys_content, user_content, response_format=None, model_name=None):
788
  print("generate_content_by_gemini")
789
  print(f"LLM using: {model_name}")
790
+
791
+ gemini_model = GenerativeModel(model_name=model_name)
792
+ model_response = gemini_model.generate_content(
793
  f"{sys_content}, {user_content}"
794
  )
795
  content = model_response.candidates[0].content.parts[0].text
 
799
  def generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=None, model_name=None):
800
  # 使用 OpenAI 生成基于上传数据的问题
801
 
802
+ if LLM_model in ["gemini-1.5-pro","gemini-1.5-flash"]:
803
  print(f"LLM: {LLM_model}")
804
+ model_name = LLM_model
805
  content = generate_content_by_gemini(sys_content, user_content, response_format, model_name=model_name)
806
  # elif LLM_model == "anthropic-claude-3-sonnet":
807
  # print(f"LLM: {LLM_model}")
 
2176
  return word_path
2177
 
2178
  # ---- Chatbot ----
2179
+ def get_instructions(content_subject, content_grade, transcript_text, key_moments, socratic_mode=True):
2180
  if socratic_mode:
2181
  method = "Socratic style, guide thinking, no direct answers. this is very important, please be seriously following."
2182
  else:
 
2186
  subject: {content_subject}
2187
  grade: {content_grade}
2188
  context: {key_moments}
2189
+ transcript_text: {transcript_text}
2190
  Assistant Role: you are a {content_subject} assistant. you can call yourself as {content_subject} 學伴
2191
  User Role: {content_grade} th-grade student.
2192
  Method: {method}
2193
  Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
2194
+ Strategy:
2195
+ - You are a professional tutor, and you will use the following teaching strategies based on the textbook content.
2196
+ # General Strategies
2197
+ Needs Analysis:
2198
+ The tutor/assistant teacher should be able to conduct dynamic needs analysis based on the student's responses. Use questions to understand the student's needs and difficulties.
2199
+ Example questions: "What do you want to learn today?" or "What difficulties are you encountering in this part of the content?"
2200
+
2201
+ Dynamic Goal Setting:
2202
+ Set learning goals based on student feedback, which can be short-term or long-term. The tutor/assistant teacher can adjust the plan automatically according to the student's progress.
2203
+ Example questions: "What is our goal for this week?" or "What tasks do you hope to complete today?"
2204
+
2205
+ Flexible Teaching Methods:
2206
+ Provide different teaching methods and resources based on the student's age and learning style. The tutor/assistant teacher can adjust teaching strategies based on student feedback.
2207
+ Example questions: "Do you prefer learning through videos or reading materials?" or "We can understand this problem through examples, what do you think?"
2208
+
2209
+ Patience and Encouragement:
2210
+ Provide positive feedback and encouragement, especially when students encounter difficulties. The tutor/assistant teacher should be able to detect the student's emotions and provide appropriate support.
2211
+ Example questions: "Don't worry, let's try again." or "You did well, keep it up!"
2212
+
2213
+ Regular Feedback and Evaluation:
2214
+ Regularly evaluate the student's learning progress and provide feedback. The tutor/assistant teacher can use tests and practice questions to assess the student's understanding.
2215
+ Example questions: "Let's check your progress." or "How do you feel about your learning progress during this period?"
2216
+
2217
+ Good Communication Skills:
2218
+ Maintain good communication with students, responding to their questions and needs in a timely manner. The tutor/assistant teacher should be able to identify and solve students' problems.
2219
+ Example questions: "Is there any problem that you need my help with?" or "Is this part clear to you?"
2220
+
2221
+ Maintaining Professionalism:
2222
+ Continue learning and improving teaching skills, and maintain punctuality and responsibility. The tutor/assistant teacher should provide accurate and up-to-date information.
2223
+ Example questions: "What is our learning goal for today?" or "Remember to study a little bit every day, and gradually accumulate knowledge."
2224
+
2225
+ Creating a Positive Learning Environment:
2226
+ Create a positive, supportive, and motivating learning atmosphere. The tutor/assistant teacher should suggest students take breaks and relax at appropriate times.
2227
+ Example questions: "Let's take a break and continue studying afterward." or "How do you feel about this learning environment? Do we need any adjustments?"
2228
+
2229
+ # Specific Applications
2230
+ The tutor/assistant teacher can automatically adjust the depth and complexity of the questions based on these general strategies by grade. For example:
2231
+ - Kindergarten and Elementary School Students: Use simple vocabulary and concrete examples, with more pictures and gamified content.
2232
+ - Middle School Students: Use interactive and practical methods, such as quizzes and group discussions.
2233
+ - High School Students: Use deep learning and critical thinking exercises, such as project research and discussions.
2234
+ - Adult Learners: Emphasize practical applications and work-related content, such as case studies and workshops.
2235
+
2236
  Response:
2237
  - if user say hi or hello or any greeting, just say hi back and introduce yourself. Then tell user to ask question in context.
 
2238
  - include math symbols (use LaTeX $ to cover before and after, ex: $x^2$)
2239
  - hint with video timestamp which format 【參考:00:00:00】.
2240
+ - Sometimes encourage user with relaxing atmosphere.
2241
+ - if user ask questions not include in context, just tell them to ask the question in context and give them example question.
 
2242
  Restrictions:
2243
  - Answer within video content, no external references
2244
  - don't repeat user's question, guide them to think more.
 
2351
  moment.pop('images', None)
2352
  moment.pop('end', None)
2353
  moment.pop('transcript', None)
2354
+ moment.pop('suggested_images', None)
2355
+
2356
+ if isinstance(transcript_state, str):
2357
+ transcript_state_json = json.loads(transcript_state)
2358
+ else:
2359
+ transcript_state_json = transcript_state
2360
+ # remain only text
2361
+ transcript_text = ""
2362
+ for content in transcript_state_json:
2363
+ transcript_text += content["text"] + ","
2364
+
2365
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2366
+ instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
2367
  print(f"=== instructions:{instructions} ===")
2368
  user_message_note = "/n 請嚴格遵循instructions,擔任一位蘇格拉底家教,絕對不要重複 user 的問句,請用引導的方式指引方向,請一定要用繁體中文回答 zh-TW,並用台灣人的禮貌口語表達,回答時不要特別說明這是台灣人的語氣,請在回答的最後標註【參考:(時):(分):(秒)】,(如果是反問學生,就只問一個問題,請幫助學生更好的理解資料,字數在100字以內,回答時如果講到數學專有名詞,請用數學符號代替文字(Latex 用 $ 字號 render, ex: $x^2$)"
2369
  user_content = user_message + user_message_note
 
2436
  moment.pop('images', None)
2437
  moment.pop('end', None)
2438
  moment.pop('transcript', None)
2439
+ moment.pop('suggested_images', None)
2440
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2441
 
2442
+ if isinstance(transcript_state, str):
2443
+ transcript_state_json = json.loads(transcript_state)
2444
+ else:
2445
+ transcript_state_json = transcript_state
2446
+ # remain only text
2447
+ transcript_text = ""
2448
+ for content in transcript_state_json:
2449
+ transcript_text += content["text"] + ","
2450
+
2451
+ instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
2452
 
2453
  chatbot_config = {
2454
  "video_id": video_id,
 
2752
 
2753
  return run.status
2754
 
2755
+ def chat_with_opan_ai_assistant_streaming(user_message, chat_history, password, video_id, user_data, thread_id, transcript_state, key_moments, content_subject, content_grade, socratic_mode=True):
2756
  verify_password(password)
2757
 
2758
  print("=====user_data=====")
 
2804
  moment.pop('images', None)
2805
  moment.pop('end', None)
2806
  moment.pop('transcript', None)
2807
+ moment.pop('suggested_images', None)
2808
  key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
2809
 
2810
+ if isinstance(transcript_state, str):
2811
+ transcript_state_json = json.loads(transcript_state)
2812
+ else:
2813
+ transcript_state_json = transcript_state
2814
+ # remain only text
2815
+ transcript_text = ""
2816
+ for content in transcript_state_json:
2817
+ transcript_text += content["text"] + ","
2818
+
2819
+ instructions = get_instructions(content_subject, content_grade, transcript_text, key_moments_text, socratic_mode)
2820
  # 创建线程
2821
  if not thread_id:
2822
  thread = client.beta.threads.create()
 
3070
  )
3071
  ai_chatbot_ai_type = gr.Textbox(value="chat_completions", visible=True)
3072
  ai_chatbot_thread_id = gr.Textbox(label="thread_id", visible=True)
3073
+ ai_chatbot_socratic_mode_btn = gr.Checkbox(label="蘇格拉底家教助理模式", value=False, visible=True)
3074
  latex_delimiters = [{"left": "$", "right": "$", "display": False}]
3075
  with gr.Accordion("選擇 AI 小精靈", elem_id="chatbot_select_accordion") as chatbot_select_accordion:
3076
  with gr.Row():
 
3300
  with gr.Accordion("See Details", open=False) as see_details:
3301
  with gr.Row():
3302
  is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
3303
+ LLM_model = gr.Dropdown(label="LLM Model", choices=["open-ai-gpt-4o", "anthropic-claude-3-sonnet", "gemini-1.5-pro", "gemini-1.5-flash"], value="open-ai-gpt-4o", visible=True, interactive=True)
3304
  with gr.Tab("逐字稿本文"):
3305
  with gr.Row() as transcript_admmin:
3306
  transcript_kind = gr.Textbox(value="transcript", show_label=False)
requirements.txt CHANGED
@@ -12,11 +12,11 @@ google-auth-httplib2
12
  google-auth-oauthlib
13
  google-cloud-storage
14
  google-cloud-bigquery
15
- google-cloud-aiplatform
16
  groq
17
  yt_dlp
18
  uuid
19
  gtts
20
- boto3
21
  pydub
22
  vertexai
 
12
  google-auth-oauthlib
13
  google-cloud-storage
14
  google-cloud-bigquery
15
+ google-generativeai
16
  groq
17
  yt_dlp
18
  uuid
19
  gtts
20
+ boto3==1.28.57
21
  pydub
22
  vertexai