nitiksh's picture
Update app.py
2447a0a verified
import os
import uuid
import gradio as gr
import yt_dlp
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from pydub import AudioSegment
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import pysrt
import threading
UPLOAD_FOLDER = 'uploaded_videos'
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
def download_youtube_video(url, output_path=UPLOAD_FOLDER):
random_filename = str(uuid.uuid4())
ydl_opts = {
'outtmpl': f'{output_path}/{random_filename}.%(ext)s',
'format': 'bestvideo+bestaudio/best',
'noplaylist': True,
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_filename = f"{random_filename}.{info_dict['ext']}"
print(f"Downloaded: {info_dict['title']}")
print(f"Saved as: {video_filename}")
return video_filename
except Exception as e:
print(f"An error occurred: {e}")
return None
def extract_audio_from_video(video_path, audio_path):
video_clip = VideoFileClip(video_path)
video_clip.audio.write_audiofile(audio_path)
def transcribe_audio(audio_path, recognizer, language="en", chunk_length_ms=30000, retries=3):
audio = AudioSegment.from_wav(audio_path)
chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
transcription = ""
for i, chunk in enumerate(chunks):
chunk_path = f"/tmp/chunk_{i}.wav"
chunk.export(chunk_path, format="wav")
with sr.AudioFile(chunk_path) as source:
audio_data = recognizer.record(source)
attempts = 0
while attempts < retries:
try:
text = recognizer.recognize_google(audio_data, language=language)
transcription += text + " "
break
except sr.UnknownValueError:
transcription += "[Unintelligible] "
break
except sr.RequestError as e:
attempts += 1
if attempts >= retries:
transcription += f"[Error: {e}] "
break
print(f"Retrying transcription... (Attempt {attempts}/{retries})")
os.remove(chunk_path)
return transcription.strip()
def split_text_into_segments(text, total_duration, segment_duration=2):
words = text.split()
total_words = len(words)
words_per_segment = max(1, total_words // (total_duration // segment_duration))
segments = []
current_segment = []
current_length = 0
for word in words:
current_segment.append(word)
current_length += 1
if current_length >= words_per_segment:
segments.append(' '.join(current_segment))
current_segment = []
current_length = 0
if current_segment:
segments.append(' '.join(current_segment))
return segments
def generate_srt(segments, total_duration, segment_duration=2):
srt_content = []
for i, segment in enumerate(segments):
start_time = i * segment_duration * 1000
end_time = min((i + 1) * segment_duration * 1000, total_duration * 1000)
srt_content.append(f"{i+1}")
srt_content.append(f"{format_time(start_time)} --> {format_time(end_time)}")
srt_content.append(f"{segment}\n")
return "\n".join(srt_content)
def format_time(milliseconds):
seconds, milliseconds = divmod(milliseconds, 1000)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
def wrap_text(draw, text, font, max_width):
lines = []
words = text.split()
while words:
line = ''
while words and draw.textbbox((0, 0), line + words[0], font=font)[2] <= max_width:
line = f"{line} {words.pop(0)}" if line else words.pop(0)
lines.append(line)
return lines
def add_text_to_frame(frame, text, font_path):
image = Image.fromarray(frame)
draw = ImageDraw.Draw(image)
width, height = image.size
font_size = 40
font = ImageFont.truetype(font_path, font_size)
max_text_width = width * 0.9
lines = wrap_text(draw, text, font, max_text_width)
line_height = draw.textbbox((0, 0), lines[0], font=font)[3] - draw.textbbox((0, 0), lines[0], font=font)[1]
total_text_height = (line_height + 10) * len(lines)
y = height * 0.9 - total_text_height
for line in lines:
text_bbox = draw.textbbox((0, 0), line, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
padding = 20
x = (width - text_width) / 2
background_x0 = x - padding
background_y0 = y - 0
background_x1 = x + text_width + padding
background_y1 = y + text_height + padding
draw.rectangle([background_x0, background_y0, background_x1, background_y1], fill=(53, 53, 53))
draw.text((x, y), line, font=font, fill="white")
y += line_height + padding
return np.array(image)
def add_subtitles_to_video(video_path, srt_path, font_path, output_path):
video = VideoFileClip(video_path)
subs = pysrt.open(srt_path)
def process_frame(get_frame, t):
frame = get_frame(t)
for sub in subs:
if sub.start.ordinal / 1000 <= t <= sub.end.ordinal / 1000:
frame = add_text_to_frame(frame, sub.text, font_path)
return frame
new_video = video.fl(process_frame)
new_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
def create_subtitles(youtube_link=None, video_file=None):
video_filename = ''
response_message = ''
video_path = ''
video_with_subtitles_path = ''
if youtube_link:
video_filename = download_youtube_video(youtube_link)
if video_filename:
response_message = f"YouTube video saved successfully as {video_filename}."
video_path = os.path.join(UPLOAD_FOLDER, video_filename)
else:
response_message = "Failed to download YouTube video."
elif video_file:
video_filename = str(uuid.uuid4()) + '_' + video_file.name
video_path = os.path.join(UPLOAD_FOLDER, video_filename)
with open(video_path, 'wb') as f:
f.write(video_file.read())
response_message = f"File saved successfully as {video_filename}."
if video_path:
audio_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '.wav')
extract_audio_from_video(video_path, audio_path)
recognizer = sr.Recognizer()
transcription = transcribe_audio(audio_path, recognizer)
video_duration = int(VideoFileClip(video_path).duration)
segments = split_text_into_segments(transcription, video_duration)
srt_content = generate_srt(segments, video_duration)
srt_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '.srt')
with open(srt_path, 'w') as srt_file:
srt_file.write(srt_content)
font_path = "KdamThmorPro-Regular.ttf"
video_with_subtitles_path = os.path.join(UPLOAD_FOLDER, video_filename.rsplit('.', 1)[0] + '_with_subtitles.mp4')
add_subtitles_to_video(video_path, srt_path, font_path, video_with_subtitles_path)
return response_message, video_with_subtitles_path
def gradio_interface(youtube_link=None, video_file=None):
message, video_path = create_subtitles(youtube_link=youtube_link, video_file=video_file)
return message, video_path
with gr.Blocks() as app:
gr.Markdown("# Video Subtitle Creator")
with gr.Row():
youtube_link = gr.Textbox(label="YouTube Link")
video_file = gr.File(label="Upload Video File")
with gr.Row():
submit_btn = gr.Button("Create Subtitles")
response_message = gr.Textbox(label="Response", interactive=False)
video_player = gr.Video(label="Video with Subtitles", interactive=False)
submit_btn.click(
gradio_interface,
inputs=[youtube_link, video_file],
outputs=[response_message, video_player]
)
app.launch()