youngtsai commited on
Commit
e27706d
·
1 Parent(s): 5e224d7

video_length = get_video_duration(video_id)

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -369,6 +369,10 @@ def generate_transcription_by_whisper(video_id):
369
 
370
  return transcription
371
 
 
 
 
 
372
  def process_transcript_and_screenshots_on_gcs(video_id):
373
  print("====process_transcript_and_screenshots_on_gcs====")
374
  # GCS
@@ -396,6 +400,11 @@ def process_transcript_and_screenshots_on_gcs(video_id):
396
  print("沒有找到字幕")
397
  transcript = generate_transcription_by_whisper(video_id)
398
 
 
 
 
 
 
399
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
400
  GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
401
 
 
369
 
370
  return transcription
371
 
372
+ def get_video_duration(video_id):
373
+ yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
374
+ return yt.length
375
+
376
  def process_transcript_and_screenshots_on_gcs(video_id):
377
  print("====process_transcript_and_screenshots_on_gcs====")
378
  # GCS
 
400
  print("沒有找到字幕")
401
  transcript = generate_transcription_by_whisper(video_id)
402
 
403
+ video_length = get_video_duration(video_id)
404
+ for entry in transcript:
405
+ if entry['start'] > video_length:
406
+ transcript.remove(entry)
407
+
408
  transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
409
  GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
410