Spaces:
Running
Running
File size: 4,894 Bytes
a5e1063 19eddee a5e1063 df32805 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from faster_whisper import WhisperModel
import math
def word_level_transcribe(audio, max_segment_duration=2.0): # Set your desired max duration here
model = WhisperModel("tiny", device="cpu")
segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True, log_progress=True)
segments = list(segments) # The transcription will actually run here.
wordlevel_info = []
for segment in segments:
for word in segment.words:
print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
return wordlevel_info
def create_subtitles(wordlevel_info):
punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '!', '?'} # Add/remove punctuation as needed
subtitles = []
line = []
for word_data in wordlevel_info:
line.append(word_data)
current_word = word_data['word']
# Check if current word ends with punctuation or line reached 5 words
ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
if ends_with_punct or len(line) == 5:
# Create a new subtitle segment
subtitle = {
"word": " ".join(item["word"] for item in line),
"start": line[0]["start"],
"end": line[-1]["end"],
"textcontents": line.copy()
}
subtitles.append(subtitle)
line = []
# Add remaining words if any
if line:
subtitle = {
"word": " ".join(item["word"] for item in line),
"start": line[0]["start"],
"end": line[-1]["end"],
"textcontents": line.copy()
}
subtitles.append(subtitle)
# Remove gaps between segments by extending the previous segment's end time
for i in range(1, len(subtitles)):
prev_subtitle = subtitles[i - 1]
current_subtitle = subtitles[i]
# Extend the previous segment's end time to the start of the current segment
prev_subtitle["end"] = current_subtitle["start"]
return subtitles
def format_time(seconds):
hours = math.floor(seconds / 3600)
seconds %= 3600
minutes = math.floor(seconds / 60)
seconds %= 60
milliseconds = round((seconds - math.floor(seconds)) * 1000)
seconds = math.floor(seconds)
formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
return formatted_time
def generate_subtitle_file(language, segments, input_video_name):
subtitle_file = f"sub-{input_video_name}.{language}.srt"
text = ""
for index, segment in enumerate(segments):
segment_start = format_time(segment['start'])
segment_end = format_time(segment['end'])
text += f"{str(index+1)} \n"
text += f"{segment_start} --> {segment_end} \n"
text += f"{segment['word']} \n"
text += "\n"
f = open(subtitle_file, "w", encoding='utf8')
f.write(text)
f.close()
return subtitle_file
def split_srt_file(input_file, max_chars=3000):
# Read the contents of the SRT file
with open(input_file, 'r', encoding='utf-8') as file:
content = file.read()
file.close()
# Split the content into individual subtitles
subtitles = content.strip().split('\n\n')
# Prepare to write the split files
output_files = []
current_file_content = ''
current_file_index = 1
for subtitle in subtitles:
# Check if adding this subtitle would exceed the character limit
if len(current_file_content) + len(subtitle) + 2 > max_chars: # +2 for \n\n
# Write the current file
output_file_name = f'split_{current_file_index}.srt'
with open(output_file_name, 'w', encoding='utf-8') as output_file:
output_file.write(current_file_content.strip())
output_files.append(output_file_name)
# Prepare for the next file
current_file_index += 1
current_file_content = subtitle + '\n\n'
else:
# If it fits, add the subtitle
current_file_content += subtitle + '\n\n'
# Write any remaining content to a new SRT file
if current_file_content:
output_file_name = f'split_{current_file_index}.srt'
with open(output_file_name, 'w', encoding='utf-8') as output_file:
output_file.write(current_file_content.strip())
output_files.append(output_file_name)
return output_files
def transcribe(mp3_file):
print("transcribe")
wordlevel_info=word_level_transcribe(mp3_file)
subtitles = create_subtitles(wordlevel_info)
subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
srt_list = split_srt_file(subtitle_file)
return srt_list |