Spaces:

nitiksh
/

SubtitleGenerator

Runtime error

App Files Files Community

SubtitleGenerator / app.py

nitiksh

Update app.py

2447a0a verified 8 months ago

raw

history blame contribute delete

8.3 kB

	import os
	import uuid
	import gradio as gr
	import yt_dlp
	from moviepy.editor import VideoFileClip
	import speech_recognition as sr
	from pydub import AudioSegment
	from PIL import Image, ImageDraw, ImageFont
	import numpy as np
	import pysrt
	import threading

	UPLOAD_FOLDER = 'uploaded_videos'
	if not os.path.exists(UPLOAD_FOLDER):
	os.makedirs(UPLOAD_FOLDER)

	def download_youtube_video(url, output_path=UPLOAD_FOLDER):
	random_filename = str(uuid.uuid4())
	ydl_opts = {
	'outtmpl': f'{output_path}/{random_filename}.%(ext)s',
	'format': 'bestvideo+bestaudio/best',
	'noplaylist': True,
	}

	try:
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=True)
	video_filename = f"{random_filename}.{info_dict['ext']}"
	print(f"Downloaded: {info_dict['title']}")
	print(f"Saved as: {video_filename}")
	return video_filename
	except Exception as e:
	print(f"An error occurred: {e}")
	return None

	def extract_audio_from_video(video_path, audio_path):
	video_clip = VideoFileClip(video_path)
	video_clip.audio.write_audiofile(audio_path)

	def transcribe_audio(audio_path, recognizer, language="en", chunk_length_ms=30000, retries=3):
	audio = AudioSegment.from_wav(audio_path)
	chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
	transcription = ""

	for i, chunk in enumerate(chunks):
	chunk_path = f"/tmp/chunk_{i}.wav"
	chunk.export(chunk_path, format="wav")
	with sr.AudioFile(chunk_path) as source:
	audio_data = recognizer.record(source)
	attempts = 0
	while attempts < retries:
	try:
	text = recognizer.recognize_google(audio_data, language=language)
	transcription += text + " "
	break
	except sr.UnknownValueError:
	transcription += "[Unintelligible] "
	break
	except sr.RequestError as e:
	attempts += 1
	if attempts >= retries:
	transcription += f"[Error: {e}] "
	break
	print(f"Retrying transcription... (Attempt {attempts}/{retries})")

	os.remove(chunk_path)

	return transcription.strip()

	def split_text_into_segments(text, total_duration, segment_duration=2):
	words = text.split()
	total_words = len(words)
	words_per_segment = max(1, total_words // (total_duration // segment_duration))
	segments = []
	current_segment = []
	current_length = 0

	for word in words:
	current_segment.append(word)
	current_length += 1
	if current_length >= words_per_segment:
	segments.append(' '.join(current_segment))
	current_segment = []
	current_length = 0

	if current_segment:
	segments.append(' '.join(current_segment))

	return segments

	def generate_srt(segments, total_duration, segment_duration=2):
	srt_content = []
	for i, segment in enumerate(segments):
	start_time = i * segment_duration * 1000
	end_time = min((i + 1) * segment_duration * 1000, total_duration * 1000)
	srt_content.append(f"{i+1}")
	srt_content.append(f"{format_time(start_time)} --> {format_time(end_time)}")
	srt_content.append(f"{segment}\n")

	return "\n".join(srt_content)

	def format_time(milliseconds):
	seconds, milliseconds = divmod(milliseconds, 1000)
	minutes, seconds = divmod(seconds, 60)
	hours, minutes = divmod(minutes, 60)
	return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

	def wrap_text(draw, text, font, max_width):
	lines = []
	words = text.split()
	while words:
	line = ''
	while words and draw.textbbox((0, 0), line + words[0], font=font)[2] <= max_width:
	line = f"{line} {words.pop(0)}" if line else words.pop(0)
	lines.append(line)
	return lines

	def add_text_to_frame(frame, text, font_path):
	image = Image.fromarray(frame)
	draw = ImageDraw.Draw(image)
	width, height = image.size

	font_size = 40
	font = ImageFont.truetype(font_path, font_size)

	max_text_width = width * 0.9
	lines = wrap_text(draw, text, font, max_text_width)

	line_height = draw.textbbox((0, 0), lines[0], font=font)[3] - draw.textbbox((0, 0), lines[0], font=font)[1]
	total_text_height = (line_height + 10) * len(lines)

	y = height * 0.9 - total_text_height

	for line in lines:
	text_bbox = draw.textbbox((0, 0), line, font=font)
	text_width = text_bbox[2] - text_bbox[0]
	text_height = text_bbox[3] - text_bbox[1]
	padding = 20
	x = (width - text_width) / 2

	background_x0 = x - padding
	background_y0 = y - 0
	background_x1 = x + text_width + padding
	background_y1 = y + text_height + padding
	draw.rectangle([background_x0, background_y0, background_x1, background_y1], fill=(53, 53, 53))

	draw.text((x, y), line, font=font, fill="white")
	y += line_height + padding

	return np.array(image)

	def add_subtitles_to_video(video_path, srt_path, font_path, output_path):
	video = VideoFileClip(video_path)
	subs = pysrt.open(srt_path)

	def process_frame(get_frame, t):
	frame = get_frame(t)
	for sub in subs:
	if sub.start.ordinal / 1000 <= t <= sub.end.ordinal / 1000:
	frame = add_text_to_frame(frame, sub.text, font_path)
	return frame

	new_video = video.fl(process_frame)
	new_video.write_videofile(output_path, codec="libx264", audio_codec="aac")

	def create_subtitles(youtube_link=None, video_file=None):
	video_filename = ''
	response_message = ''
	video_path = ''
	video_with_subtitles_path = ''

	if youtube_link:
	video_filename = download_youtube_video(youtube_link)
	if video_filename:
	response_message = f"YouTube video saved successfully as {video_filename}."
	video_path = os.path.join(UPLOAD_FOLDER, video_filename)
	else:
	response_message = "Failed to download YouTube video."
	elif video_file:
	video_filename = str(uuid.uuid4()) + '_' + video_file.name
	video_path = os.path.join(UPLOAD_FOLDER, video_filename)
	with open(video_path, 'wb') as f:
	f.write(video_file.read())
	response_message = f"File saved successfully as {video_filename}."

	if video_path:
	audio_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '.wav')
	extract_audio_from_video(video_path, audio_path)
	recognizer = sr.Recognizer()
	transcription = transcribe_audio(audio_path, recognizer)
	video_duration = int(VideoFileClip(video_path).duration)
	segments = split_text_into_segments(transcription, video_duration)
	srt_content = generate_srt(segments, video_duration)
	srt_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '.srt')

	with open(srt_path, 'w') as srt_file:
	srt_file.write(srt_content)

	font_path = "KdamThmorPro-Regular.ttf"
	video_with_subtitles_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '_with_subtitles.mp4')
	add_subtitles_to_video(video_path, srt_path, font_path, video_with_subtitles_path)

	return response_message, video_with_subtitles_path

	def gradio_interface(youtube_link=None, video_file=None):
	message, video_path = create_subtitles(youtube_link=youtube_link, video_file=video_file)
	return message, video_path

	with gr.Blocks() as app:
	gr.Markdown("# Video Subtitle Creator")

	with gr.Row():
	youtube_link = gr.Textbox(label="YouTube Link")
	video_file = gr.File(label="Upload Video File")

	with gr.Row():
	submit_btn = gr.Button("Create Subtitles")

	response_message = gr.Textbox(label="Response", interactive=False)
	video_player = gr.Video(label="Video with Subtitles", interactive=False)

	submit_btn.click(
	gradio_interface,
	inputs=[youtube_link, video_file],
	outputs=[response_message, video_player]
	)

	app.launch()