Spaces:

marquesafonso
/

multilang-asr-captioner

Running

App Files Files Community

multilang-asr-captioner / utils /transcriber.py

marquesafonso

fix srt writer and parser for standard time format (hh:mm:ss,ms)

4f772d6 over 1 year ago

raw

history blame

2.87 kB

	from faster_whisper import WhisperModel
	import logging

	logging.basicConfig(filename='main.log',
	encoding='utf-8',
	level=logging.DEBUG,
	format='%(asctime)s %(levelname)s %(message)s',
	datefmt='%m/%d/%Y %I:%M:%S %p')
	logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

	def convert_seconds_to_time(seconds):
	# Separate seconds into hours, minutes, and seconds
	seconds = float(seconds)
	hours, remainder = divmod(seconds, 3600)
	minutes, remainder = divmod(remainder, 60)
	whole_seconds = int(remainder)
	milliseconds = int((remainder - whole_seconds) * 1000)

	# Format the time string
	time_string = f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"

	return time_string

	def write_srt(segments, srt_path, max_words_per_line):
	"""Write segments to an SRT file with a maximum number of words per line."""
	with open(srt_path, "w", encoding='utf-8') as file:
	line_counter = 1
	for _, segment in enumerate(segments):
	words_in_line = []
	for w, word in enumerate(segment.words):
	words_in_line.append(word)
	# Write the line if max words limit reached or it's the last word in the segment
	if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
	if words_in_line: # Check to avoid writing a line if there are no words
	start_time = convert_seconds_to_time(words_in_line[0].start)
	end_time = convert_seconds_to_time(words_in_line[-1].end)
	line_text = ' '.join([w.word.strip() for w in words_in_line])
	file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
	# Reset for the next line and increment line counter
	line_counter += 1
	words_in_line = [] # Reset words list for the next line

	def transcriber(input_path:str,
	srt_path:str,
	max_words_per_line:int,
	task:str):
	#TODO: model_size = "distil-large-v3" -> need to wait for new pypi version of faster-whisper (pull request already merged)
	model_size = "large-v3"
	model = WhisperModel(model_size, device="cpu", compute_type="int8") #TODO: add condition_on_previous_text=False when using distil-whisper
	segments, info = model.transcribe(
	input_path,
	beam_size=5,
	task=task,
	vad_filter=True,
	vad_parameters=dict(min_silence_duration_ms=500),
	word_timestamps=True
	)
	logging.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
	write_srt(segments=segments, srt_path=srt_path, max_words_per_line=max_words_per_line)