Spaces:
Running
Running
video_length = get_video_duration(video_id)
Browse files
app.py
CHANGED
@@ -369,6 +369,10 @@ def generate_transcription_by_whisper(video_id):
|
|
369 |
|
370 |
return transcription
|
371 |
|
|
|
|
|
|
|
|
|
372 |
def process_transcript_and_screenshots_on_gcs(video_id):
|
373 |
print("====process_transcript_and_screenshots_on_gcs====")
|
374 |
# GCS
|
@@ -396,6 +400,11 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
396 |
print("沒有找到字幕")
|
397 |
transcript = generate_transcription_by_whisper(video_id)
|
398 |
|
|
|
|
|
|
|
|
|
|
|
399 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
400 |
GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
|
401 |
|
|
|
369 |
|
370 |
return transcription
|
371 |
|
372 |
+
def get_video_duration(video_id):
|
373 |
+
yt = YouTube(f'https://www.youtube.com/watch?v={video_id}')
|
374 |
+
return yt.length
|
375 |
+
|
376 |
def process_transcript_and_screenshots_on_gcs(video_id):
|
377 |
print("====process_transcript_and_screenshots_on_gcs====")
|
378 |
# GCS
|
|
|
400 |
print("沒有找到字幕")
|
401 |
transcript = generate_transcription_by_whisper(video_id)
|
402 |
|
403 |
+
video_length = get_video_duration(video_id)
|
404 |
+
for entry in transcript:
|
405 |
+
if entry['start'] > video_length:
|
406 |
+
transcript.remove(entry)
|
407 |
+
|
408 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
409 |
GCS_SERVICE.upload_json_string(bucket_name, transcript_blob_name, transcript_text)
|
410 |
|