youngtsai commited on
Commit
7eb060e
·
1 Parent(s): e3630bd

with open(chunk_path, "rb") as chunk_file:

Browse files
Files changed (1) hide show
  1. app.py +24 -61
app.py CHANGED
@@ -373,6 +373,9 @@ def get_transcript(video_id):
373
  for language in languages:
374
  try:
375
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
 
 
 
376
  return transcript # 成功獲取字幕,直接返回結果
377
  except NoTranscriptFound:
378
  continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
@@ -413,73 +416,33 @@ def generate_transcription(video_id):
413
  chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}"
414
  chunk.export(chunk_path, format=codec_name)
415
 
416
- with open(chunk_path, "rb") as chunk_file:
417
- response = OPEN_AI_CLIENT.audio.transcriptions.create(
418
- model="whisper-1",
419
- file=chunk_file,
420
- response_format="verbose_json",
421
- timestamp_granularities=["segment"],
422
- prompt="Transcribe the following audio file. if chinese, please using 'language: zh-TW' ",
423
- )
424
-
425
- # Adjusting the timestamps for the chunk based on its position in the full audio
426
- adjusted_segments = [{
427
- 'text': segment['text'],
428
- 'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds
429
- 'end': math.ceil(segment['end'] + start_time / 1000.0),
430
- 'duration': math.ceil(segment['end'] - segment['start'])
431
- } for segment in response.segments]
432
-
433
- transcription.extend(adjusted_segments)
 
 
 
434
 
435
  # Remove temporary chunk files after processing
436
  os.remove(chunk_path)
437
 
438
  return transcription
439
 
440
- def process_transcript_and_screenshots(video_id):
441
- print("====process_transcript_and_screenshots====")
442
-
443
- # Drive
444
- service = init_drive_service()
445
- parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
446
- folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
447
-
448
- # 逐字稿文件名
449
- file_name = f'{video_id}_transcript.json'
450
- # 检查逐字稿是否存在
451
- exists, file_id = check_file_exists(service, folder_id, file_name)
452
- if not exists:
453
- # 从YouTube获取逐字稿并上传
454
- transcript = get_transcript(video_id)
455
- if transcript:
456
- print("成功獲取字幕")
457
- else:
458
- print("沒有找到字幕")
459
- transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
460
- file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
461
- print("逐字稿已上传到Google Drive")
462
- else:
463
- # 逐字稿已存在,下载逐字稿内容
464
- print("逐字稿已存在于Google Drive中")
465
- transcript_text = download_file_as_string(service, file_id)
466
- transcript = json.loads(transcript_text)
467
-
468
- # 处理逐字稿中的每个条目,检查并上传截图
469
- for entry in transcript:
470
- if 'img_file_id' not in entry:
471
- screenshot_path = screenshot_youtube_video(video_id, entry['start'])
472
- img_file_id = upload_img_directly(service, f"{video_id}_{entry['start']}.jpg", folder_id, screenshot_path)
473
- set_public_permission(service, img_file_id)
474
- entry['img_file_id'] = img_file_id
475
- print(f"截图已上传到Google Drive: {img_file_id}")
476
-
477
- # 更新逐字稿文件
478
- updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
479
- update_file_on_drive(service, file_id, updated_transcript_text)
480
- print("逐字稿已更新,包括截图链接")
481
- return transcript
482
-
483
  def process_transcript_and_screenshots_on_gcs(video_id):
484
  print("====process_transcript_and_screenshots_on_gcs====")
485
  # GCS
 
373
  for language in languages:
374
  try:
375
  transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
376
+ print("===transcript===")
377
+ print(transcript)
378
+ print("===transcript===")
379
  return transcript # 成功獲取字幕,直接返回結果
380
  except NoTranscriptFound:
381
  continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
 
416
  chunk_path = f"{OUTPUT_PATH}/{video_id}_part_{i}.{codec_name}"
417
  chunk.export(chunk_path, format=codec_name)
418
 
419
+ try:
420
+ with open(chunk_path, "rb") as chunk_file:
421
+ response = OPEN_AI_CLIENT.audio.transcriptions.create(
422
+ model="whisper-1",
423
+ file=chunk_file,
424
+ response_format="verbose_json",
425
+ timestamp_granularities=["segment"],
426
+ prompt="Transcribe the following audio file. if chinese, please using 'language: zh-TW' ",
427
+ )
428
+
429
+ # Adjusting the timestamps for the chunk based on its position in the full audio
430
+ adjusted_segments = [{
431
+ 'text': segment['text'],
432
+ 'start': math.ceil(segment['start'] + start_time / 1000.0), # Converting milliseconds to seconds
433
+ 'end': math.ceil(segment['end'] + start_time / 1000.0),
434
+ 'duration': math.ceil(segment['end'] - segment['start'])
435
+ } for segment in response.segments]
436
+
437
+ transcription.extend(adjusted_segments)
438
+ except Exception as e:
439
+ print(f"Error processing chunk {i}: {str(e)}")
440
 
441
  # Remove temporary chunk files after processing
442
  os.remove(chunk_path)
443
 
444
  return transcription
445
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
  def process_transcript_and_screenshots_on_gcs(video_id):
447
  print("====process_transcript_and_screenshots_on_gcs====")
448
  # GCS