Spaces:
Running
Running
trsnacript admin
Browse files
app.py
CHANGED
@@ -492,6 +492,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
492 |
is_new_transcript = False
|
493 |
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
|
494 |
if not is_transcript_exists:
|
|
|
495 |
# 从YouTube获取逐字稿并上传
|
496 |
try:
|
497 |
transcript = get_transcript(video_id)
|
@@ -571,7 +572,6 @@ def process_youtube_link(password, link):
|
|
571 |
VIDEO_ID = video_id
|
572 |
|
573 |
try:
|
574 |
-
# transcript = process_transcript_and_screenshots(video_id)
|
575 |
transcript = process_transcript_and_screenshots_on_gcs(video_id)
|
576 |
except Exception as e:
|
577 |
error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
|
@@ -579,17 +579,14 @@ def process_youtube_link(password, link):
|
|
579 |
print(error_msg)
|
580 |
raise gr.Error(error_msg)
|
581 |
|
|
|
582 |
formatted_transcript = []
|
583 |
formatted_simple_transcript =[]
|
584 |
-
screenshot_paths = []
|
585 |
for entry in transcript:
|
586 |
start_time = format_seconds_to_time(entry['start'])
|
587 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
588 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
589 |
img_file_id = entry['img_file_id']
|
590 |
-
# img_file_id =""
|
591 |
-
# 先取消 Google Drive 的图片
|
592 |
-
# screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
|
593 |
screenshot_path = img_file_id
|
594 |
line = {
|
595 |
"start_time": start_time,
|
@@ -606,7 +603,6 @@ def process_youtube_link(password, link):
|
|
606 |
"text": entry['text']
|
607 |
}
|
608 |
formatted_simple_transcript.append(simple_line)
|
609 |
-
screenshot_paths.append(screenshot_path)
|
610 |
|
611 |
global TRANSCRIPTS
|
612 |
TRANSCRIPTS = formatted_transcript
|
@@ -639,7 +635,7 @@ def process_youtube_link(password, link):
|
|
639 |
questions[0] if len(questions) > 0 else "", \
|
640 |
questions[1] if len(questions) > 1 else "", \
|
641 |
questions[2] if len(questions) > 2 else "", \
|
642 |
-
|
643 |
summary, \
|
644 |
key_moments_html, \
|
645 |
mind_map, \
|
@@ -1380,11 +1376,11 @@ def delete_LLM_content(video_id, kind):
|
|
1380 |
bucket_name = 'video_ai_assistant'
|
1381 |
file_name = f'{video_id}_{kind}.json'
|
1382 |
blob_name = f"{video_id}/{file_name}"
|
1383 |
-
# 检查
|
1384 |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
1385 |
if is_file_exists:
|
1386 |
delete_blob(gcs_client, bucket_name, blob_name)
|
1387 |
-
print("
|
1388 |
return gr.update(value="", interactive=False)
|
1389 |
|
1390 |
def update_LLM_content(video_id, new_content, kind):
|
@@ -1398,28 +1394,47 @@ def update_LLM_content(video_id, new_content, kind):
|
|
1398 |
reading_passage_json = {"reading_passage": str(new_content)}
|
1399 |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
1400 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
|
|
|
1401 |
elif kind == "summary":
|
1402 |
summary_json = {"summary": str(new_content)}
|
1403 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
1404 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
|
|
|
1405 |
elif kind == "mind_map":
|
1406 |
mind_map_json = {"mind_map": str(new_content)}
|
1407 |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
1408 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1409 |
|
1410 |
print(f"{kind} 已更新到GCS")
|
1411 |
-
return gr.update(value=
|
1412 |
|
1413 |
def create_LLM_content(video_id, df_string, kind):
|
1414 |
print(f"===create_{kind}===")
|
|
|
|
|
1415 |
if kind == "reading_passage":
|
1416 |
content = generate_reading_passage(df_string)
|
|
|
1417 |
elif kind == "summary":
|
1418 |
content = generate_summarise(df_string)
|
|
|
1419 |
elif kind == "mind_map":
|
1420 |
content = generate_mind_map(df_string)
|
|
|
|
|
|
|
|
|
|
|
1421 |
|
1422 |
-
update_LLM_content(video_id, content, kind)
|
1423 |
return gr.update(value=content, interactive=False)
|
1424 |
|
1425 |
|
@@ -2016,7 +2031,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2016 |
with gr.Row() as admin:
|
2017 |
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
|
2018 |
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
|
2019 |
-
video_id = gr.Textbox(label="video_id", visible=
|
2020 |
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
|
2021 |
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
|
2022 |
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
|
@@ -2183,7 +2198,14 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2183 |
|
2184 |
with gr.Accordion("See Details", open=False) as see_details:
|
2185 |
with gr.Tab("本文"):
|
2186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2187 |
with gr.Tab("逐字稿"):
|
2188 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
2189 |
with gr.Tab("圖文"):
|
@@ -2374,6 +2396,28 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2374 |
outputs=[df_summarise]
|
2375 |
)
|
2376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2377 |
# 教師版
|
2378 |
worksheet_content_btn.click(
|
2379 |
get_ai_content,
|
|
|
492 |
is_new_transcript = False
|
493 |
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
|
494 |
if not is_transcript_exists:
|
495 |
+
print("逐字稿文件不存在于GCS中,重新建立")
|
496 |
# 从YouTube获取逐字稿并上传
|
497 |
try:
|
498 |
transcript = get_transcript(video_id)
|
|
|
572 |
VIDEO_ID = video_id
|
573 |
|
574 |
try:
|
|
|
575 |
transcript = process_transcript_and_screenshots_on_gcs(video_id)
|
576 |
except Exception as e:
|
577 |
error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
|
|
|
579 |
print(error_msg)
|
580 |
raise gr.Error(error_msg)
|
581 |
|
582 |
+
original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
|
583 |
formatted_transcript = []
|
584 |
formatted_simple_transcript =[]
|
|
|
585 |
for entry in transcript:
|
586 |
start_time = format_seconds_to_time(entry['start'])
|
587 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
588 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
589 |
img_file_id = entry['img_file_id']
|
|
|
|
|
|
|
590 |
screenshot_path = img_file_id
|
591 |
line = {
|
592 |
"start_time": start_time,
|
|
|
603 |
"text": entry['text']
|
604 |
}
|
605 |
formatted_simple_transcript.append(simple_line)
|
|
|
606 |
|
607 |
global TRANSCRIPTS
|
608 |
TRANSCRIPTS = formatted_transcript
|
|
|
635 |
questions[0] if len(questions) > 0 else "", \
|
636 |
questions[1] if len(questions) > 1 else "", \
|
637 |
questions[2] if len(questions) > 2 else "", \
|
638 |
+
original_transcript, \
|
639 |
summary, \
|
640 |
key_moments_html, \
|
641 |
mind_map, \
|
|
|
1376 |
bucket_name = 'video_ai_assistant'
|
1377 |
file_name = f'{video_id}_{kind}.json'
|
1378 |
blob_name = f"{video_id}/{file_name}"
|
1379 |
+
# 检查 file 是否存在
|
1380 |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
1381 |
if is_file_exists:
|
1382 |
delete_blob(gcs_client, bucket_name, blob_name)
|
1383 |
+
print(f"{file_name}已从GCS中删除")
|
1384 |
return gr.update(value="", interactive=False)
|
1385 |
|
1386 |
def update_LLM_content(video_id, new_content, kind):
|
|
|
1394 |
reading_passage_json = {"reading_passage": str(new_content)}
|
1395 |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
1396 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
|
1397 |
+
updated_content = reading_passage_text
|
1398 |
elif kind == "summary":
|
1399 |
summary_json = {"summary": str(new_content)}
|
1400 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
1401 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
|
1402 |
+
updated_content = summary_text
|
1403 |
elif kind == "mind_map":
|
1404 |
mind_map_json = {"mind_map": str(new_content)}
|
1405 |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
1406 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
|
1407 |
+
updated_content = mind_map_text
|
1408 |
+
elif kind == "transcript":
|
1409 |
+
if isinstance(new_content, str):
|
1410 |
+
transcript_json = json.loads(new_content)
|
1411 |
+
else:
|
1412 |
+
transcript_json = new_content
|
1413 |
+
transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
|
1414 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
|
1415 |
+
updated_content = transcript_text
|
1416 |
|
1417 |
print(f"{kind} 已更新到GCS")
|
1418 |
+
return gr.update(value=updated_content, interactive=False)
|
1419 |
|
1420 |
def create_LLM_content(video_id, df_string, kind):
|
1421 |
print(f"===create_{kind}===")
|
1422 |
+
print(f"video_id: {video_id}")
|
1423 |
+
|
1424 |
if kind == "reading_passage":
|
1425 |
content = generate_reading_passage(df_string)
|
1426 |
+
update_LLM_content(video_id, content, kind)
|
1427 |
elif kind == "summary":
|
1428 |
content = generate_summarise(df_string)
|
1429 |
+
update_LLM_content(video_id, content, kind)
|
1430 |
elif kind == "mind_map":
|
1431 |
content = generate_mind_map(df_string)
|
1432 |
+
update_LLM_content(video_id, content, kind)
|
1433 |
+
elif kind == "transcript":
|
1434 |
+
content = process_transcript_and_screenshots_on_gcs(video_id)
|
1435 |
+
update_LLM_content(video_id, content, kind)
|
1436 |
+
content = json.dumps(content, ensure_ascii=False, indent=2)
|
1437 |
|
|
|
1438 |
return gr.update(value=content, interactive=False)
|
1439 |
|
1440 |
|
|
|
2031 |
with gr.Row() as admin:
|
2032 |
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
|
2033 |
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
|
2034 |
+
video_id = gr.Textbox(label="video_id", visible=True)
|
2035 |
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
|
2036 |
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
|
2037 |
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
|
|
|
2198 |
|
2199 |
with gr.Accordion("See Details", open=False) as see_details:
|
2200 |
with gr.Tab("本文"):
|
2201 |
+
with gr.Row() as transcript_admmin:
|
2202 |
+
transcript_kind = gr.Textbox(value="transcript", show_label=False)
|
2203 |
+
transcript_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
2204 |
+
transcript_update_button = gr.Button("更新", size="sm", variant="primary")
|
2205 |
+
transcript_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
2206 |
+
transcript_create_button = gr.Button("建立", size="sm", variant="primary")
|
2207 |
+
with gr.Row():
|
2208 |
+
df_string_output = gr.Textbox(lines=40, label="Data Text", )
|
2209 |
with gr.Tab("逐字稿"):
|
2210 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
2211 |
with gr.Tab("圖文"):
|
|
|
2396 |
outputs=[df_summarise]
|
2397 |
)
|
2398 |
|
2399 |
+
# transcript event
|
2400 |
+
transcript_create_button.click(
|
2401 |
+
create_LLM_content,
|
2402 |
+
inputs=[video_id, df_string_output, transcript_kind],
|
2403 |
+
outputs=[df_string_output]
|
2404 |
+
)
|
2405 |
+
transcript_delete_button.click(
|
2406 |
+
delete_LLM_content,
|
2407 |
+
inputs=[video_id, transcript_kind],
|
2408 |
+
outputs=[df_string_output]
|
2409 |
+
)
|
2410 |
+
transcript_edit_button.click(
|
2411 |
+
enable_edit_mode,
|
2412 |
+
inputs=[],
|
2413 |
+
outputs=[df_string_output]
|
2414 |
+
)
|
2415 |
+
transcript_update_button.click(
|
2416 |
+
update_LLM_content,
|
2417 |
+
inputs=[video_id, df_string_output, transcript_kind],
|
2418 |
+
outputs=[df_string_output]
|
2419 |
+
)
|
2420 |
+
|
2421 |
# 教師版
|
2422 |
worksheet_content_btn.click(
|
2423 |
get_ai_content,
|