Spaces:

JunyiAcademy
/

vaitor2

Running

youngtsai commited on Apr 22, 2024

Commit

e27706d

1 Parent(s): 5e224d7

video_length = get_video_duration(video_id)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -369,6 +369,10 @@ def generate_transcription_by_whisper(video_id):
     return transcription
 def process_transcript_and_screenshots_on_gcs(video_id):
     print("====process_transcript_and_screenshots_on_gcs====")
     # GCS
@@ -396,6 +400,11 @@ def process_transcript_and_screenshots_on_gcs(video_id):
             print("沒有找到字幕")
             transcript = generate_transcription_by_whisper(video_id)
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)

     return transcription
+def get_video_duration(video_id):
+    yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
+    return yt.length
 def process_transcript_and_screenshots_on_gcs(video_id):
     print("====process_transcript_and_screenshots_on_gcs====")
     # GCS
             print("沒有找到字幕")
             transcript = generate_transcription_by_whisper(video_id)
+        video_length = get_video_duration(video_id)
+        for entry in transcript:
+            if entry['start'] > video_length:
+                transcript.remove(entry)
         transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
         GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)