youngtsai commited on
Commit
9b92004
·
1 Parent(s): 41af362

trsnacript admin

Browse files
Files changed (1) hide show
  1. app.py +57 -13
app.py CHANGED
@@ -492,6 +492,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
492
  is_new_transcript = False
493
  is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
494
  if not is_transcript_exists:
 
495
  # 从YouTube获取逐字稿并上传
496
  try:
497
  transcript = get_transcript(video_id)
@@ -571,7 +572,6 @@ def process_youtube_link(password, link):
571
  VIDEO_ID = video_id
572
 
573
  try:
574
- # transcript = process_transcript_and_screenshots(video_id)
575
  transcript = process_transcript_and_screenshots_on_gcs(video_id)
576
  except Exception as e:
577
  error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
@@ -579,17 +579,14 @@ def process_youtube_link(password, link):
579
  print(error_msg)
580
  raise gr.Error(error_msg)
581
 
 
582
  formatted_transcript = []
583
  formatted_simple_transcript =[]
584
- screenshot_paths = []
585
  for entry in transcript:
586
  start_time = format_seconds_to_time(entry['start'])
587
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
588
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
589
  img_file_id = entry['img_file_id']
590
- # img_file_id =""
591
- # 先取消 Google Drive 的图片
592
- # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
593
  screenshot_path = img_file_id
594
  line = {
595
  "start_time": start_time,
@@ -606,7 +603,6 @@ def process_youtube_link(password, link):
606
  "text": entry['text']
607
  }
608
  formatted_simple_transcript.append(simple_line)
609
- screenshot_paths.append(screenshot_path)
610
 
611
  global TRANSCRIPTS
612
  TRANSCRIPTS = formatted_transcript
@@ -639,7 +635,7 @@ def process_youtube_link(password, link):
639
  questions[0] if len(questions) > 0 else "", \
640
  questions[1] if len(questions) > 1 else "", \
641
  questions[2] if len(questions) > 2 else "", \
642
- formatted_transcript_json, \
643
  summary, \
644
  key_moments_html, \
645
  mind_map, \
@@ -1380,11 +1376,11 @@ def delete_LLM_content(video_id, kind):
1380
  bucket_name = 'video_ai_assistant'
1381
  file_name = f'{video_id}_{kind}.json'
1382
  blob_name = f"{video_id}/{file_name}"
1383
- # 检查 reading_passage 是否存在
1384
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1385
  if is_file_exists:
1386
  delete_blob(gcs_client, bucket_name, blob_name)
1387
- print("reading_passage已从GCS中删除")
1388
  return gr.update(value="", interactive=False)
1389
 
1390
  def update_LLM_content(video_id, new_content, kind):
@@ -1398,28 +1394,47 @@ def update_LLM_content(video_id, new_content, kind):
1398
  reading_passage_json = {"reading_passage": str(new_content)}
1399
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1400
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
 
1401
  elif kind == "summary":
1402
  summary_json = {"summary": str(new_content)}
1403
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1404
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
 
1405
  elif kind == "mind_map":
1406
  mind_map_json = {"mind_map": str(new_content)}
1407
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1408
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
 
 
 
 
 
 
 
 
 
1409
 
1410
  print(f"{kind} 已更新到GCS")
1411
- return gr.update(value=new_content, interactive=False)
1412
 
1413
  def create_LLM_content(video_id, df_string, kind):
1414
  print(f"===create_{kind}===")
 
 
1415
  if kind == "reading_passage":
1416
  content = generate_reading_passage(df_string)
 
1417
  elif kind == "summary":
1418
  content = generate_summarise(df_string)
 
1419
  elif kind == "mind_map":
1420
  content = generate_mind_map(df_string)
 
 
 
 
 
1421
 
1422
- update_LLM_content(video_id, content, kind)
1423
  return gr.update(value=content, interactive=False)
1424
 
1425
 
@@ -2016,7 +2031,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2016
  with gr.Row() as admin:
2017
  password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
2018
  youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
2019
- video_id = gr.Textbox(label="video_id", visible=False)
2020
  # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
2021
  # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
2022
  user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
@@ -2183,7 +2198,14 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2183
 
2184
  with gr.Accordion("See Details", open=False) as see_details:
2185
  with gr.Tab("本文"):
2186
- df_string_output = gr.Textbox(lines=40, label="Data Text")
 
 
 
 
 
 
 
2187
  with gr.Tab("逐字稿"):
2188
  simple_html_content = gr.HTML(label="Simple Transcript")
2189
  with gr.Tab("圖文"):
@@ -2374,6 +2396,28 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
2374
  outputs=[df_summarise]
2375
  )
2376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2377
  # 教師版
2378
  worksheet_content_btn.click(
2379
  get_ai_content,
 
492
  is_new_transcript = False
493
  is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
494
  if not is_transcript_exists:
495
+ print("逐字稿文件不存在于GCS中,重新建立")
496
  # 从YouTube获取逐字稿并上传
497
  try:
498
  transcript = get_transcript(video_id)
 
572
  VIDEO_ID = video_id
573
 
574
  try:
 
575
  transcript = process_transcript_and_screenshots_on_gcs(video_id)
576
  except Exception as e:
577
  error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
 
579
  print(error_msg)
580
  raise gr.Error(error_msg)
581
 
582
+ original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
583
  formatted_transcript = []
584
  formatted_simple_transcript =[]
 
585
  for entry in transcript:
586
  start_time = format_seconds_to_time(entry['start'])
587
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
588
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
589
  img_file_id = entry['img_file_id']
 
 
 
590
  screenshot_path = img_file_id
591
  line = {
592
  "start_time": start_time,
 
603
  "text": entry['text']
604
  }
605
  formatted_simple_transcript.append(simple_line)
 
606
 
607
  global TRANSCRIPTS
608
  TRANSCRIPTS = formatted_transcript
 
635
  questions[0] if len(questions) > 0 else "", \
636
  questions[1] if len(questions) > 1 else "", \
637
  questions[2] if len(questions) > 2 else "", \
638
+ original_transcript, \
639
  summary, \
640
  key_moments_html, \
641
  mind_map, \
 
1376
  bucket_name = 'video_ai_assistant'
1377
  file_name = f'{video_id}_{kind}.json'
1378
  blob_name = f"{video_id}/{file_name}"
1379
+ # 检查 file 是否存在
1380
  is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
1381
  if is_file_exists:
1382
  delete_blob(gcs_client, bucket_name, blob_name)
1383
+ print(f"{file_name}已从GCS中删除")
1384
  return gr.update(value="", interactive=False)
1385
 
1386
  def update_LLM_content(video_id, new_content, kind):
 
1394
  reading_passage_json = {"reading_passage": str(new_content)}
1395
  reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
1396
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
1397
+ updated_content = reading_passage_text
1398
  elif kind == "summary":
1399
  summary_json = {"summary": str(new_content)}
1400
  summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
1401
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
1402
+ updated_content = summary_text
1403
  elif kind == "mind_map":
1404
  mind_map_json = {"mind_map": str(new_content)}
1405
  mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
1406
  upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
1407
+ updated_content = mind_map_text
1408
+ elif kind == "transcript":
1409
+ if isinstance(new_content, str):
1410
+ transcript_json = json.loads(new_content)
1411
+ else:
1412
+ transcript_json = new_content
1413
+ transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
1414
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
1415
+ updated_content = transcript_text
1416
 
1417
  print(f"{kind} 已更新到GCS")
1418
+ return gr.update(value=updated_content, interactive=False)
1419
 
1420
  def create_LLM_content(video_id, df_string, kind):
1421
  print(f"===create_{kind}===")
1422
+ print(f"video_id: {video_id}")
1423
+
1424
  if kind == "reading_passage":
1425
  content = generate_reading_passage(df_string)
1426
+ update_LLM_content(video_id, content, kind)
1427
  elif kind == "summary":
1428
  content = generate_summarise(df_string)
1429
+ update_LLM_content(video_id, content, kind)
1430
  elif kind == "mind_map":
1431
  content = generate_mind_map(df_string)
1432
+ update_LLM_content(video_id, content, kind)
1433
+ elif kind == "transcript":
1434
+ content = process_transcript_and_screenshots_on_gcs(video_id)
1435
+ update_LLM_content(video_id, content, kind)
1436
+ content = json.dumps(content, ensure_ascii=False, indent=2)
1437
 
 
1438
  return gr.update(value=content, interactive=False)
1439
 
1440
 
 
2031
  with gr.Row() as admin:
2032
  password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
2033
  youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
2034
+ video_id = gr.Textbox(label="video_id", visible=True)
2035
  # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
2036
  # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
2037
  user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
 
2198
 
2199
  with gr.Accordion("See Details", open=False) as see_details:
2200
  with gr.Tab("本文"):
2201
+ with gr.Row() as transcript_admmin:
2202
+ transcript_kind = gr.Textbox(value="transcript", show_label=False)
2203
+ transcript_edit_button = gr.Button("編輯", size="sm", variant="primary")
2204
+ transcript_update_button = gr.Button("更新", size="sm", variant="primary")
2205
+ transcript_delete_button = gr.Button("刪除", size="sm", variant="primary")
2206
+ transcript_create_button = gr.Button("建立", size="sm", variant="primary")
2207
+ with gr.Row():
2208
+ df_string_output = gr.Textbox(lines=40, label="Data Text", )
2209
  with gr.Tab("逐字稿"):
2210
  simple_html_content = gr.HTML(label="Simple Transcript")
2211
  with gr.Tab("圖文"):
 
2396
  outputs=[df_summarise]
2397
  )
2398
 
2399
+ # transcript event
2400
+ transcript_create_button.click(
2401
+ create_LLM_content,
2402
+ inputs=[video_id, df_string_output, transcript_kind],
2403
+ outputs=[df_string_output]
2404
+ )
2405
+ transcript_delete_button.click(
2406
+ delete_LLM_content,
2407
+ inputs=[video_id, transcript_kind],
2408
+ outputs=[df_string_output]
2409
+ )
2410
+ transcript_edit_button.click(
2411
+ enable_edit_mode,
2412
+ inputs=[],
2413
+ outputs=[df_string_output]
2414
+ )
2415
+ transcript_update_button.click(
2416
+ update_LLM_content,
2417
+ inputs=[video_id, df_string_output, transcript_kind],
2418
+ outputs=[df_string_output]
2419
+ )
2420
+
2421
  # 教師版
2422
  worksheet_content_btn.click(
2423
  get_ai_content,