Spaces:

JunyiAcademy
/

vaitor2

Running

youngtsai commited on Feb 7, 2024

Commit

ef2b19a

1 Parent(s): 1b3ea4e

formatted_simple_transcript.append(simple_line)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -282,6 +282,7 @@ def process_youtube_link(link):
     transcript = process_transcript_and_screenshots(video_id)
     formatted_transcript = []
     screenshot_paths = []
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
@@ -297,6 +298,13 @@ def process_youtube_link(link):
             "screenshot_path": screenshot_path
         }
         formatted_transcript.append(line)
         screenshot_paths.append(screenshot_path)
     html_content = format_transcript_to_html(formatted_transcript)
@@ -305,10 +313,9 @@ def process_youtube_link(link):
     print("=====html_content=====")
     # 基于逐字稿生成其他所需的输出
-    questions = generate_questions(formatted_transcript)
-    # 将 DataFrame 转换为纯文本，並分行
     df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
-    df_summarise = generate_df_summarise(formatted_transcript)
     global TRANSCRIPTS
     TRANSCRIPTS = formatted_transcript

     transcript = process_transcript_and_screenshots(video_id)
     formatted_transcript = []
+    formatted_simple_transcript =[]
     screenshot_paths = []
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
             "screenshot_path": screenshot_path
         }
         formatted_transcript.append(line)
+        # formatted_simple_transcript 只要 start_time, end_time, text
+        simple_line = {
+            "start_time": start_time,
+            "end_time": end_time,
+            "text": entry['text']
+        }
+        formatted_simple_transcript.append(simple_line)
         screenshot_paths.append(screenshot_path)
     html_content = format_transcript_to_html(formatted_transcript)
     print("=====html_content=====")
     # 基于逐字稿生成其他所需的输出
+    questions = generate_questions(formatted_simple_transcript)
     df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
+    df_summarise = generate_df_summarise(formatted_simple_transcript)
     global TRANSCRIPTS
     TRANSCRIPTS = formatted_transcript