Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on May 13, 2024

Commit

0af8b29

1 Parent(s): 5324cd6

update

Browse files

Files changed (1) hide show

app.py +120 -154

app.py CHANGED Viewed

@@ -503,7 +503,7 @@ def upload_transcript_to_gcs(video_id, transcript):
     GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
     print("Transcript uploaded successfully.")
-def process_youtube_link(password, link):
     verify_password(password)
     video_id = extract_youtube_id(link)
@@ -545,21 +545,21 @@ def process_youtube_link(password, link):
     # 基于逐字稿生成其他所需的输出
     source = "gcs"
-    questions_answers = get_questions_answers(video_id, formatted_simple_transcript, source)
     questions_answers_json = json.dumps(questions_answers, ensure_ascii=False, indent=2)
-    summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
     summary_text = summary_json["summary"]
     summary = summary_json["summary"]
-    key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
     key_moments = key_moments_json["key_moments"]
     key_moments_text = json.dumps(key_moments, ensure_ascii=False, indent=2)
     key_moments_html = get_key_moments_html(key_moments)
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
-    mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
     mind_map = mind_map_json["mind_map"]
     mind_map_html = get_mind_map_html(mind_map)
-    reading_passage_json = get_reading_passage(video_id, formatted_simple_transcript, source)
     reading_passage_text = reading_passage_json["reading_passage"]
     reading_passage = reading_passage_json["reading_passage"]
     meta_data = get_meta_data(video_id)
@@ -703,70 +703,75 @@ def split_data(df_string, word_base=100000):
     return segments
-def generate_content_by_LLM(sys_content, user_content, response_format=None):
-    # 使用 OpenAI 生成基于上传数据的问题
-    try:
-        model = "gpt-4-turbo"
-        # 使用 OPEN AI 生成 Reading Passage
-        messages = [
-            {"role": "system", "content": sys_content},
-            {"role": "user", "content": user_content}
-        ]
-        request_payload = {
-            "model": model,
-            "messages": messages,
-            "max_tokens": 4000,
-            "response_format": response_format
-        }
-        if response_format is not None:
-            request_payload["response_format"] = response_format
-        response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
-        content = response.choices[0].message.content.strip()
-    except Exception as e:
-        print(f"Error generating reading passage: {str(e)}")
-        print("using REDROCK")
-        # 使用 REDROCK 生成 Reading Passage
-        messages = [
-            {"role": "user", "content": user_content}
-        ]
-        model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
-        # model_id = "anthropic.claude-3-haiku-20240307-v1:0"
-        kwargs = {
-            "modelId": model_id,
-            "contentType": "application/json",
-            "accept": "application/json",
-            "body": json.dumps({
-                "anthropic_version": "bedrock-2023-05-31",
-                "max_tokens": 4000,
-                "system": sys_content,
-                "messages": messages
-            })
-        }
-        response = BEDROCK_CLIENT.invoke_model(**kwargs)
-        response_body = json.loads(response.get('body').read())
-        content = response_body.get('content')[0].get('text')
     print("=====content=====")
     print(content)
     print("=====content=====")
     return content
-def get_reading_passage(video_id, df_string, source):
     if source == "gcs":
         print("===get_reading_passage on gcs===")
-        gcs_client = GCS_CLIENT
         bucket_name = 'video_ai_assistant'
         file_name = f'{video_id}_reading_passage_latex.json'
         blob_name = f"{video_id}/{file_name}"
         # 检查 reading_passage 是否存在
         is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_file_exists:
-            reading_passage = generate_reading_passage(df_string)
             reading_passage_json = {"reading_passage": str(reading_passage)}
             reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
@@ -799,7 +804,7 @@ def get_reading_passage(video_id, df_string, source):
     return reading_passage_json
-def generate_reading_passage(df_string):
     print("===generate_reading_passage===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
@@ -818,7 +823,7 @@ def generate_reading_passage(df_string):
           加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
           請直接給出文章，不用介紹怎麼處理的或是文章字數等等
       """
-        content = generate_content_by_LLM(sys_content, user_content)
         all_content.append(content + "\n")
     # 將所有生成的閱讀理解段落合併成一個完整的文章
@@ -831,7 +836,7 @@ def text_to_speech(video_id, text):
     tts.save(filename)
     return filename
-def get_mind_map(video_id, df_string, source):
     if source == "gcs":
         print("===get_mind_map on gcs===")
         gcs_client = GCS_CLIENT
@@ -841,7 +846,7 @@ def get_mind_map(video_id, df_string, source):
         # 检查檔案是否存在
         is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_file_exists:
-            mind_map = generate_mind_map(df_string)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
@@ -862,7 +867,7 @@ def get_mind_map(video_id, df_string, source):
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
-            mind_map = generate_mind_map(df_string)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, mind_map_text)
@@ -875,7 +880,7 @@ def get_mind_map(video_id, df_string, source):
     return mind_map_json
-def generate_mind_map(df_string):
     print("===generate_mind_map===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
@@ -887,7 +892,7 @@ def generate_mind_map(df_string):
             注意：不需要前後文敘述，直接給出 markdown 文本即可
             這對我很重要
         """
-        content = generate_content_by_LLM(sys_content, user_content)
         all_content.append(content + "\n")
     # 將所有生成的閱讀理解段落合併成一個完整的文章
@@ -906,10 +911,9 @@ def get_mind_map_html(mind_map):
     """
     return mind_map_html
-def get_video_id_summary(video_id, df_string, source):
     if source == "gcs":
         print("===get_video_id_summary on gcs===")
-        gcs_client = GCS_CLIENT
         bucket_name = 'video_ai_assistant'
         file_name = f'{video_id}_summary_markdown.json'
         summary_file_blob_name = f"{video_id}/{file_name}"
@@ -917,7 +921,7 @@ def get_video_id_summary(video_id, df_string, source):
         is_summary_file_exists = GCS_SERVICE.check_file_exists(bucket_name, summary_file_blob_name)
         if not is_summary_file_exists:
             meta_data = get_meta_data(video_id)
-            summary = generate_summarise(df_string, meta_data)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, summary_file_blob_name, summary_text)
@@ -939,7 +943,7 @@ def get_video_id_summary(video_id, df_string, source):
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
             meta_data = get_meta_data(video_id)
-            summary = generate_summarise(df_string, meta_data)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
@@ -960,7 +964,7 @@ def get_video_id_summary(video_id, df_string, source):
     return summary_json
-def generate_summarise(df_string, metadata=None):
     print("===generate_summarise===")
     # 使用 OpenAI 生成基于上传数据的问题
     if metadata:
@@ -1008,7 +1012,7 @@ def generate_summarise(df_string, metadata=None):
             ## ❓ 延伸小問題
                 - (一個 bullet point....請圍繞「課程名稱」為學習重點，進行重點整理，不要整理跟情境故事相關的問題)
         """
-        content = generate_content_by_LLM(sys_content, user_content)
         all_content.append(content + "\n")
     if len(all_content) > 1:
@@ -1047,13 +1051,13 @@ def generate_summarise(df_string, metadata=None):
             ## ❓ 延伸小問題
                 - ( {all_content_cnt}  個 bullet point....請圍繞「課程名稱」為學習重點，進行重點整理，不要整理跟情境故事相關的問題)
         """
-        final_content = generate_content_by_LLM(sys_content, user_content)
     else:
         final_content = all_content[0]
     return final_content
-def get_questions(video_id, df_string, source="gcs"):
     if source == "gcs":
         # 去 gcs 確認是有有 video_id_questions.json
         print("===get_questions on gcs===")
@@ -1064,7 +1068,7 @@ def get_questions(video_id, df_string, source="gcs"):
         # 检查檔案是否存在
         is_questions_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_questions_exists:
-            questions = generate_questions(df_string)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
             print("questions已上傳到GCS")
@@ -1085,7 +1089,7 @@ def get_questions(video_id, df_string, source="gcs"):
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
-            questions = generate_questions(df_string)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, questions_text)
             print("questions已上傳到Google Drive")
@@ -1105,7 +1109,7 @@ def get_questions(video_id, df_string, source="gcs"):
     print("=====get_questions=====")
     return q1, q2, q3
-def generate_questions(df_string):
     print("===generate_questions===")
     # 使用 OpenAI 生成基于上传数据的问题
     if isinstance(df_string, str):
@@ -1128,69 +1132,26 @@ def generate_questions(df_string):
             [q1的敘述text, q2的敘述text, q3的敘述text]
         }}
     """
-    try:
-        model = "gpt-4-turbo"
-        messages = [
-            {"role": "system", "content": sys_content},
-            {"role": "user", "content": user_content}
-        ]
-        response_format = { "type": "json_object" }
-        print("=====messages=====")
-        print(messages)
-        print("=====messages=====")
-        request_payload = {
-            "model": model,
-            "messages": messages,
-            "max_tokens": 4000,
-            "response_format": response_format
-        }
-        response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
-        questions = json.loads(response.choices[0].message.content)["questions"]
-    except:
-        messages = [
-            {"role": "user", "content": user_content}
-        ]
-        model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
-        # model_id = "anthropic.claude-3-haiku-20240307-v1:0"
-        kwargs = {
-            "modelId": model_id,
-            "contentType": "application/json",
-            "accept": "application/json",
-            "body": json.dumps({
-                "anthropic_version": "bedrock-2023-05-31",
-                "max_tokens": 4000,
-                "system": sys_content,
-                "messages": messages
-            })
-        }
-        response = BEDROCK_CLIENT.invoke_model(**kwargs)
-        response_body = json.loads(response.get('body').read())
-        response_completion = response_body.get('content')[0].get('text')
-        questions = json.loads(response_completion)["questions"]
     print("=====json_response=====")
-    print(questions)
     print("=====json_response=====")
-    return questions
-def get_questions_answers(video_id, df_string, source="gcs"):
     if source == "gcs":
         try:
             print("===get_questions_answers on gcs===")
-            gcs_client = GCS_CLIENT
             bucket_name = 'video_ai_assistant'
             file_name = f'{video_id}_questions_answers.json'
             blob_name = f"{video_id}/{file_name}"
             # 检查檔案是否存在
             is_questions_answers_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
             if not is_questions_answers_exists:
-                questions_answers = generate_questions_answers(df_string)
                 questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
                 GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
                 print("questions_answers已上傳到GCS")
@@ -1201,12 +1162,12 @@ def get_questions_answers(video_id, df_string, source="gcs"):
                 questions_answers = json.loads(questions_answers_text)
         except Exception as e:
             print(f"Error getting questions_answers: {str(e)}")
-            questions = get_questions(video_id, df_string, source)
-            questions_answers = [{"question": q, "answer": ""} for q in questions]
     return questions_answers
-def generate_questions_answers(df_string):
     print("===generate_questions_answers===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
@@ -1232,7 +1193,7 @@ def generate_questions_answers(df_string):
             }}
         """
         response_format = { "type": "json_object" }
-        content = generate_content_by_LLM(sys_content, user_content, response_format)
         content_json = json.loads(content)["questions_answers"]
         all_content += content_json
@@ -1256,7 +1217,7 @@ def change_questions(password, df_string):
     print("=====get_questions=====")
     return q1, q2, q3
-def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source):
     if source == "gcs":
         print("===get_key_moments on gcs===")
         gcs_client = GCS_CLIENT
@@ -1266,7 +1227,7 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
         # 检查檔案是否存在
         is_key_moments_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_key_moments_exists:
-            key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
@@ -1282,7 +1243,7 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
             for key_moment in key_moments_json["key_moments"]:
                 if "keywords" not in key_moment:
                     transcript = key_moment["transcript"]
-                    key_moment["keywords"] = generate_key_moments_keywords(transcript)
                     print("===keywords===")
                     print(key_moment["keywords"])
                     print("===keywords===")
@@ -1303,7 +1264,7 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
-            key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, key_moments_text)
@@ -1316,7 +1277,7 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
     return key_moments_json
-def generate_key_moments(formatted_simple_transcript, formatted_transcript):
     print("===generate_key_moments===")
     segments = split_data(formatted_simple_transcript, word_base=100000)
     all_content = []
@@ -1343,7 +1304,7 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
             }}
         """
         response_format = { "type": "json_object" }
-        content = generate_content_by_LLM(sys_content, user_content, response_format)
         key_moments = json.loads(content)["key_moments"]
         # "transcript": get text from formatted_simple_transcript
@@ -1371,7 +1332,7 @@ def generate_key_moments(formatted_simple_transcript, formatted_transcript):
     return all_content
-def generate_key_moments_keywords(transcript):
     print("===generate_key_moments_keywords===")
     segments = split_data(transcript, word_base=100000)
     all_content = []
@@ -1384,7 +1345,7 @@ def generate_key_moments_keywords(transcript):
             不用給上下文，直接給出關鍵字，使用 zh-TW，用逗號分隔， example: 關鍵字1, 關鍵字2
             transcript：{segment}
         """
-        content = generate_content_by_LLM(sys_content, user_content)
         keywords = content.strip().split(",")
         all_content += keywords
@@ -1665,7 +1626,6 @@ def delete_LLM_content(video_id, kind):
 def update_LLM_content(video_id, new_content, kind):
     print(f"===upfdate kind on gcs===")
-    gcs_client = GCS_CLIENT
     bucket_name = 'video_ai_assistant'
     file_name = f'{video_id}_{kind}.json'
     blob_name = f"{video_id}/{file_name}"
@@ -1739,16 +1699,16 @@ def update_LLM_content(video_id, new_content, kind):
     print(f"{kind} 已更新到GCS")
     return gr.update(value=updated_content, interactive=False)
-def create_LLM_content(video_id, df_string, kind):
     print(f"===create_{kind}===")
     print(f"video_id: {video_id}")
     if kind == "reading_passage_latex":
-        content = generate_reading_passage(df_string)
         update_LLM_content(video_id, content, kind)
     elif kind == "summary_markdown":
         meta_data = get_meta_data(video_id)
-        content = generate_summarise(df_string, meta_data)
         update_LLM_content(video_id, content, kind)
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
@@ -1760,7 +1720,7 @@ def create_LLM_content(video_id, df_string, kind):
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
         formatted_transcript = create_formatted_transcript(video_id, transcript)
-        gen_content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "transcript":
@@ -1768,7 +1728,7 @@ def create_LLM_content(video_id, df_string, kind):
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions":
-        gen_content = generate_questions(df_string)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions_answers":
@@ -1777,7 +1737,7 @@ def create_LLM_content(video_id, df_string, kind):
         else:
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
-        gen_content = generate_questions_answers(formatted_simple_transcript)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
@@ -2690,14 +2650,20 @@ HEAD = """
 with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.amber, text_size = gr.themes.sizes.text_lg), head=HEAD) as demo:
     with gr.Row() as admin:
-        password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
-        youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
-        video_id = gr.Textbox(label="video_id", visible=True)
-        # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
-        # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
-        user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
-        youtube_link_btn = gr.Button("Submit_YouTube_Link", elem_id="youtube_link_btn", visible=True)
-        is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
     with gr.Row() as data_state:
         content_subject_state = gr.State()  # 使用 gr.State 存储 content_subject
         content_grade_state = gr.State()  # 使用 gr.State 存储 content_grade
@@ -3170,7 +3136,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
     )
     # 当输入 YouTube 链接时触发
-    process_youtube_link_inputs = [password, youtube_link]
     process_youtube_link_outputs = [
         video_id,
         questions_answers_json,
@@ -3251,7 +3217,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': transcript_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, transcript_kind],
             'outputs': [df_string_output]
         },
         {
@@ -3282,7 +3248,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': reading_passage_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, reading_passage_kind],
             'outputs': [reading_passage_text]
         },
         {
@@ -3313,7 +3279,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': summary_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, summary_kind],
             'outputs': [summary_text]
         },
         {
@@ -3344,7 +3310,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': key_moments_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, key_moments_kind],
             'outputs': [key_moments]
         },
         {
@@ -3375,7 +3341,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': questions_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, questions_kind],
             'outputs': [questions_json]
         },
         {
@@ -3406,7 +3372,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': questions_answers_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, questions_answers_kind],
             'outputs': [questions_answers_json]
         },
         {
@@ -3437,7 +3403,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         {
             'button': worksheet_create_button,
             'action': create_LLM_content,
-            'inputs': [video_id, df_string_output, worksheet_kind],
             'outputs': [worksheet_json]
         },
         {

     GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
     print("Transcript uploaded successfully.")
+def process_youtube_link(password, link, LLM_model=None):
     verify_password(password)
     video_id = extract_youtube_id(link)
     # 基于逐字稿生成其他所需的输出
     source = "gcs"
+    questions_answers = get_questions_answers(video_id, formatted_simple_transcript, source, LLM_model)
     questions_answers_json = json.dumps(questions_answers, ensure_ascii=False, indent=2)
+    summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source, LLM_model)
     summary_text = summary_json["summary"]
     summary = summary_json["summary"]
+    key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source, LLM_model)
     key_moments = key_moments_json["key_moments"]
     key_moments_text = json.dumps(key_moments, ensure_ascii=False, indent=2)
     key_moments_html = get_key_moments_html(key_moments)
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
+    mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source, LLM_model)
     mind_map = mind_map_json["mind_map"]
     mind_map_html = get_mind_map_html(mind_map)
+    reading_passage_json = get_reading_passage(video_id, formatted_simple_transcript, source, LLM_model)
     reading_passage_text = reading_passage_json["reading_passage"]
     reading_passage = reading_passage_json["reading_passage"]
     meta_data = get_meta_data(video_id)
     return segments
+def generate_content_by_open_ai(sys_content, user_content, response_format=None):
+    print("LLM using OPEN AI")
+    model = "gpt-4-turbo"
+    messages = [
+        {"role": "system", "content": sys_content},
+        {"role": "user", "content": user_content}
+    ]
+    request_payload = {
+        "model": model,
+        "messages": messages,
+        "max_tokens": 4000,
+    }
+    if response_format is not None:
+        request_payload["response_format"] = response_format
+    response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
+    content = response.choices[0].message.content.strip()
+    return content
+def generate_content_by_bedrock(sys_content, user_content):
+    print("LLM using REDROCK")
+    messages = [
+        {"role": "user", "content": user_content +"(如果是 JSON 格式，value 的引號，請用單引號，或是用反斜線＋雙引號，避免 JSON Decoder error )"}
+    ]
+    model_id = "anthropic.claude-3-sonnet-20240229-v1:0"
+    # model_id = "anthropic.claude-3-haiku-20240307-v1:0"
+    kwargs = {
+        "modelId": model_id,
+        "contentType": "application/json",
+        "accept": "application/json",
+        "body": json.dumps({
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": 4000,
+            "system": sys_content,
+            "messages": messages
+        })
+    }
+    response = BEDROCK_CLIENT.invoke_model(**kwargs)
+    response_body = json.loads(response.get('body').read())
+    content = response_body.get('content')[0].get('text')
+    return content
+def generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=None):
+    # 使用 OpenAI 生成基于上传数据的问题
+    if LLM_model == "anthropic-claude-3-sonnet":
+        print(f"LLM: {LLM_model}")
+        content = generate_content_by_bedrock(sys_content, user_content)
+    else:
+        print(f"LLM: {LLM_model}")
+        content = generate_content_by_open_ai(sys_content, user_content, response_format)
     print("=====content=====")
     print(content)
     print("=====content=====")
     return content
+def get_reading_passage(video_id, df_string, source, LLM_model=None):
     if source == "gcs":
         print("===get_reading_passage on gcs===")
         bucket_name = 'video_ai_assistant'
         file_name = f'{video_id}_reading_passage_latex.json'
         blob_name = f"{video_id}/{file_name}"
         # 检查 reading_passage 是否存在
         is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_file_exists:
+            reading_passage = generate_reading_passage(df_string, LLM_model)
             reading_passage_json = {"reading_passage": str(reading_passage)}
             reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, reading_passage_text)
     return reading_passage_json
+def generate_reading_passage(df_string, LLM_model=None):
     print("===generate_reading_passage===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
           加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
           請直接給出文章，不用介紹怎麼處理的或是文章字數等等
       """
+        content = generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=LLM_model)
         all_content.append(content + "\n")
     # 將所有生成的閱讀理解段落合併成一個完整的文章
     tts.save(filename)
     return filename
+def get_mind_map(video_id, df_string, source, LLM_model=None):
     if source == "gcs":
         print("===get_mind_map on gcs===")
         gcs_client = GCS_CLIENT
         # 检查檔案是否存在
         is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_file_exists:
+            mind_map = generate_mind_map(df_string, LLM_model)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, mind_map_text)
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
+            mind_map = generate_mind_map(df_string, LLM_model)
             mind_map_json = {"mind_map": str(mind_map)}
             mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, mind_map_text)
     return mind_map_json
+def generate_mind_map(df_string, LLM_model=None):
     print("===generate_mind_map===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
             注意：不需要前後文敘述，直接給出 markdown 文本即可
             這對我很重要
         """
+        content = generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=LLM_model)
         all_content.append(content + "\n")
     # 將所有生成的閱讀理解段落合併成一個完整的文章
     """
     return mind_map_html
+def get_video_id_summary(video_id, df_string, source, LLM_model=None):
     if source == "gcs":
         print("===get_video_id_summary on gcs===")
         bucket_name = 'video_ai_assistant'
         file_name = f'{video_id}_summary_markdown.json'
         summary_file_blob_name = f"{video_id}/{file_name}"
         is_summary_file_exists = GCS_SERVICE.check_file_exists(bucket_name, summary_file_blob_name)
         if not is_summary_file_exists:
             meta_data = get_meta_data(video_id)
+            summary = generate_summarise(df_string, meta_data, LLM_model)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, summary_file_blob_name, summary_text)
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
             meta_data = get_meta_data(video_id)
+            summary = generate_summarise(df_string, meta_data, LLM_model)
             summary_json = {"summary": str(summary)}
             summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
     return summary_json
+def generate_summarise(df_string, metadata=None, LLM_model=None):
     print("===generate_summarise===")
     # 使用 OpenAI 生成基于上传数据的问题
     if metadata:
             ## ❓ 延伸小問題
                 - (一個 bullet point....請圍繞「課程名稱」為學習重點，進行重點整理，不要整理跟情境故事相關的問題)
         """
+        content = generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=LLM_model)
         all_content.append(content + "\n")
     if len(all_content) > 1:
             ## ❓ 延伸小問題
                 - ( {all_content_cnt}  個 bullet point....請圍繞「課程名稱」為學習重點，進行重點整理，不要整理跟情境故事相關的問題)
         """
+        final_content = generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=LLM_model)
     else:
         final_content = all_content[0]
     return final_content
+def get_questions(video_id, df_string, source="gcs", LLM_model=None):
     if source == "gcs":
         # 去 gcs 確認是有有 video_id_questions.json
         print("===get_questions on gcs===")
         # 检查檔案是否存在
         is_questions_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_questions_exists:
+            questions = generate_questions(df_string, LLM_model)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_text)
             print("questions已上傳到GCS")
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
+            questions = generate_questions(df_string, LLM_model)
             questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, questions_text)
             print("questions已上傳到Google Drive")
     print("=====get_questions=====")
     return q1, q2, q3
+def generate_questions(df_string, LLM_model=None):
     print("===generate_questions===")
     # 使用 OpenAI 生成基于上传数据的问题
     if isinstance(df_string, str):
             [q1的敘述text, q2的敘述text, q3的敘述text]
         }}
     """
+    response_format = { "type": "json_object" }
+    questions = generate_content_by_LLM(sys_content, user_content, response_format, LLM_model)
+    questions_list = json.loads(questions)["questions"]
     print("=====json_response=====")
+    print(questions_list)
     print("=====json_response=====")
+    return questions_list
+def get_questions_answers(video_id, df_string, source="gcs", LLM_model=None):
     if source == "gcs":
         try:
             print("===get_questions_answers on gcs===")
             bucket_name = 'video_ai_assistant'
             file_name = f'{video_id}_questions_answers.json'
             blob_name = f"{video_id}/{file_name}"
             # 检查檔案是否存在
             is_questions_answers_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
             if not is_questions_answers_exists:
+                questions_answers = generate_questions_answers(df_string, LLM_model)
                 questions_answers_text = json.dumps(questions_answers, ensure_ascii=False, indent=2)
                 GCS_SERVICE.upload_json_string(bucket_name, blob_name, questions_answers_text)
                 print("questions_answers已上傳到GCS")
                 questions_answers = json.loads(questions_answers_text)
         except Exception as e:
             print(f"Error getting questions_answers: {str(e)}")
+            questions_list = get_questions(video_id, df_string, source, LLM_model)
+            questions_answers = [{"question": q, "answer": ""} for q in questions_list]
     return questions_answers
+def generate_questions_answers(df_string, LLM_model=None):
     print("===generate_questions_answers===")
     segments = split_data(df_string, word_base=100000)
     all_content = []
             }}
         """
         response_format = { "type": "json_object" }
+        content = generate_content_by_LLM(sys_content, user_content, response_format, LLM_model)
         content_json = json.loads(content)["questions_answers"]
         all_content += content_json
     print("=====get_questions=====")
     return q1, q2, q3
+def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source, LLM_model=None):
     if source == "gcs":
         print("===get_key_moments on gcs===")
         gcs_client = GCS_CLIENT
         # 检查檔案是否存在
         is_key_moments_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
         if not is_key_moments_exists:
+            key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript, LLM_model)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
             GCS_SERVICE.upload_json_string(bucket_name, blob_name, key_moments_text)
             for key_moment in key_moments_json["key_moments"]:
                 if "keywords" not in key_moment:
                     transcript = key_moment["transcript"]
+                    key_moment["keywords"] = generate_key_moments_keywords(transcript, LLM_model)
                     print("===keywords===")
                     print(key_moment["keywords"])
                     print("===keywords===")
         # 检查檔案是否存在
         exists, file_id = check_file_exists(service, folder_id, file_name)
         if not exists:
+            key_moments = generate_key_moments(formatted_simple_transcript, formatted_transcript, LLM_model)
             key_moments_json = {"key_moments": key_moments}
             key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
             upload_content_directly(service, file_name, folder_id, key_moments_text)
     return key_moments_json
+def generate_key_moments(formatted_simple_transcript, formatted_transcript, LLM_model=None):
     print("===generate_key_moments===")
     segments = split_data(formatted_simple_transcript, word_base=100000)
     all_content = []
             }}
         """
         response_format = { "type": "json_object" }
+        content = generate_content_by_LLM(sys_content, user_content, response_format, LLM_model)
         key_moments = json.loads(content)["key_moments"]
         # "transcript": get text from formatted_simple_transcript
     return all_content
+def generate_key_moments_keywords(transcript, LLM_model=None):
     print("===generate_key_moments_keywords===")
     segments = split_data(transcript, word_base=100000)
     all_content = []
             不用給上下文，直接給出關鍵字，使用 zh-TW，用逗號分隔， example: 關鍵字1, 關鍵字2
             transcript：{segment}
         """
+        content = generate_content_by_LLM(sys_content, user_content, response_format=None, LLM_model=LLM_model)
         keywords = content.strip().split(",")
         all_content += keywords
 def update_LLM_content(video_id, new_content, kind):
     print(f"===upfdate kind on gcs===")
     bucket_name = 'video_ai_assistant'
     file_name = f'{video_id}_{kind}.json'
     blob_name = f"{video_id}/{file_name}"
     print(f"{kind} 已更新到GCS")
     return gr.update(value=updated_content, interactive=False)
+def create_LLM_content(video_id, df_string, kind, LLM_model=None):
     print(f"===create_{kind}===")
     print(f"video_id: {video_id}")
     if kind == "reading_passage_latex":
+        content = generate_reading_passage(df_string, LLM_model)
         update_LLM_content(video_id, content, kind)
     elif kind == "summary_markdown":
         meta_data = get_meta_data(video_id)
+        content = generate_summarise(df_string, meta_data, LLM_model)
         update_LLM_content(video_id, content, kind)
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
         formatted_transcript = create_formatted_transcript(video_id, transcript)
+        gen_content = generate_key_moments(formatted_simple_transcript, formatted_transcript, LLM_model)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "transcript":
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions":
+        gen_content = generate_questions(df_string, LLM_model)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
     elif kind == "questions_answers":
         else:
             transcript = df_string
         formatted_simple_transcript = create_formatted_simple_transcript(transcript)
+        gen_content = generate_questions_answers(formatted_simple_transcript, LLM_model)
         update_LLM_content(video_id, gen_content, kind)
         content = json.dumps(gen_content, ensure_ascii=False, indent=2)
 with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.amber, text_size = gr.themes.sizes.text_lg), head=HEAD) as demo:
     with gr.Row() as admin:
+        with gr.Column(scale=4):
+            with gr.Row():
+                password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
+                youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
+                video_id = gr.Textbox(label="video_id", visible=True)
+                # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
+                # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
+                user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
+            with gr.Row():
+                is_env_prod = gr.Checkbox(value=False, label="is_env_prod")
+                LLM_model = gr.Dropdown(label="LLM Model", choices=["open-ai-gpt-4", "anthropic-claude-3-sonnet"], value="open-ai-gpt-4", visible=True, interactive=True)
+        with gr.Column(scale=1):
+            with gr.Row():
+                youtube_link_btn = gr.Button("Submit_YouTube_Link", elem_id="youtube_link_btn", visible=True)
     with gr.Row() as data_state:
         content_subject_state = gr.State()  # 使用 gr.State 存储 content_subject
         content_grade_state = gr.State()  # 使用 gr.State 存储 content_grade
     )
     # 当输入 YouTube 链接时触发
+    process_youtube_link_inputs = [password, youtube_link, LLM_model]
     process_youtube_link_outputs = [
         video_id,
         questions_answers_json,
         {
             'button': transcript_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, transcript_kind, LLM_model],
             'outputs': [df_string_output]
         },
         {
         {
             'button': reading_passage_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, reading_passage_kind, LLM_model],
             'outputs': [reading_passage_text]
         },
         {
         {
             'button': summary_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, summary_kind, LLM_model],
             'outputs': [summary_text]
         },
         {
         {
             'button': key_moments_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, key_moments_kind, LLM_model],
             'outputs': [key_moments]
         },
         {
         {
             'button': questions_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, questions_kind, LLM_model],
             'outputs': [questions_json]
         },
         {
         {
             'button': questions_answers_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, questions_answers_kind, LLM_model],
             'outputs': [questions_answers_json]
         },
         {
         {
             'button': worksheet_create_button,
             'action': create_LLM_content,
+            'inputs': [video_id, df_string_output, worksheet_kind, LLM_model],
             'outputs': [worksheet_json]
         },
         {