File size: 1,492 Bytes
b1d066d
 
 
 
 
 
 
 
de9fc46
b1d066d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import os
# import sys
import subprocess

import whisper
from whisper.utils import write_vtt

model = whisper.load_model("base")

title = "Add multilingual text/caption to your video"


def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"


def transcribe(input_video):
    audio_file = video2mp3(input_video)

    result = model.transcribe(audio_file)

    output_dir = ''
    audio_path = audio_file.split(".")[0]

    with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as vtt:
        write_vtt(result["segments"], file=vtt)

    subtitle = audio_path + ".vtt"
    output_video = audio_path + "_subtitled.mp4"

    os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")

    return output_video


block = gr.Blocks()
with block:
    with gr.Group():
        with gr.Box():
            with gr.Row().style():
                inp_video = gr.Video(
                    label="Input Video",
                    type="filepath",
                    mirror_webcam=False
                )
                op_video = gr.Video()
            btn = gr.Button("Generate Subtitle Video")

        btn.click(transcribe, inputs=[inp_video], outputs=[op_video])

block.launch(enable_queue=True)