Spaces:
Running
Running
formatted_simple_transcript.append(simple_line)
Browse files
app.py
CHANGED
@@ -282,6 +282,7 @@ def process_youtube_link(link):
|
|
282 |
transcript = process_transcript_and_screenshots(video_id)
|
283 |
|
284 |
formatted_transcript = []
|
|
|
285 |
screenshot_paths = []
|
286 |
for entry in transcript:
|
287 |
start_time = format_seconds_to_time(entry['start'])
|
@@ -297,6 +298,13 @@ def process_youtube_link(link):
|
|
297 |
"screenshot_path": screenshot_path
|
298 |
}
|
299 |
formatted_transcript.append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
screenshot_paths.append(screenshot_path)
|
301 |
|
302 |
html_content = format_transcript_to_html(formatted_transcript)
|
@@ -305,10 +313,9 @@ def process_youtube_link(link):
|
|
305 |
print("=====html_content=====")
|
306 |
|
307 |
# 基于逐字稿生成其他所需的输出
|
308 |
-
questions = generate_questions(
|
309 |
-
# 将 DataFrame 转换为纯文本,並分行
|
310 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
311 |
-
df_summarise = generate_df_summarise(
|
312 |
|
313 |
global TRANSCRIPTS
|
314 |
TRANSCRIPTS = formatted_transcript
|
|
|
282 |
transcript = process_transcript_and_screenshots(video_id)
|
283 |
|
284 |
formatted_transcript = []
|
285 |
+
formatted_simple_transcript =[]
|
286 |
screenshot_paths = []
|
287 |
for entry in transcript:
|
288 |
start_time = format_seconds_to_time(entry['start'])
|
|
|
298 |
"screenshot_path": screenshot_path
|
299 |
}
|
300 |
formatted_transcript.append(line)
|
301 |
+
# formatted_simple_transcript 只要 start_time, end_time, text
|
302 |
+
simple_line = {
|
303 |
+
"start_time": start_time,
|
304 |
+
"end_time": end_time,
|
305 |
+
"text": entry['text']
|
306 |
+
}
|
307 |
+
formatted_simple_transcript.append(simple_line)
|
308 |
screenshot_paths.append(screenshot_path)
|
309 |
|
310 |
html_content = format_transcript_to_html(formatted_transcript)
|
|
|
313 |
print("=====html_content=====")
|
314 |
|
315 |
# 基于逐字稿生成其他所需的输出
|
316 |
+
questions = generate_questions(formatted_simple_transcript)
|
|
|
317 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
318 |
+
df_summarise = generate_df_summarise(formatted_simple_transcript)
|
319 |
|
320 |
global TRANSCRIPTS
|
321 |
TRANSCRIPTS = formatted_transcript
|