Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on Jan 26, 2024

Commit

6a6dfe0

1 Parent(s): 3c4e755

html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"

Browse files

Files changed (1) hide show

app.py +10 -2

app.py CHANGED Viewed

@@ -66,6 +66,9 @@ def process_youtube_link(link):
     # 使用 YouTube API 获取逐字稿
     # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
     video_id = link.split("=")[-1]
     transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
     # 基于逐字稿生成其他所需的输出
     questions = generate_questions(transcript)
@@ -76,14 +79,19 @@ def process_youtube_link(link):
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
         line = {
             "start_time": start_time,
             "end_time": end_time,
             "text": entry['text'],
             "embed_url": embed_url,
-            "time_sec": entry['start']
         }
         formatted_transcript.append(line)
     html_content = format_transcript_to_html(formatted_transcript)
     print("=====html_content=====")
@@ -102,7 +110,7 @@ def format_transcript_to_html(formatted_transcript):
     for entry in formatted_transcript:
         html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
         html_content += f"<p>{entry['text']}</p>"
-        html_content += f"<p>{entry['time_sec']} </p>  <br><br>"
     return html_content
 def get_embedded_youtube_link(video_id, start_time):

     # 使用 YouTube API 获取逐字稿
     # 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
     video_id = link.split("=")[-1]
+    # 先下載 video
+    download_youtube_video(video_id, output_path=OUTPUT_PATH)
+    # 再取得 transcript
     transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
     # 基于逐字稿生成其他所需的输出
     questions = generate_questions(transcript)
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
+        # 截圖
+        screenshot_path = screenshot_youtube_video(video_id, entry['start'])
         line = {
             "start_time": start_time,
             "end_time": end_time,
             "text": entry['text'],
             "embed_url": embed_url,
+            "time_sec": entry['start'],
+            "screenshot_path": screenshot_path
         }
         formatted_transcript.append(line)
     html_content = format_transcript_to_html(formatted_transcript)
     print("=====html_content=====")
     for entry in formatted_transcript:
         html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
         html_content += f"<p>{entry['text']}</p>"
+        html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
     return html_content
 def get_embedded_youtube_link(video_id, start_time):