File size: 11,490 Bytes
23d4cfa
 
 
 
0b484f3
23d4cfa
 
 
 
 
 
7b82c63
23d4cfa
8bec4e8
1c45af0
3e04bc5
8fc9419
11c4b73
a02cdd0
23d4cfa
 
 
 
8bec4e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23d4cfa
16b8b2c
 
 
 
 
23d4cfa
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23d4cfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16b8b2c
 
23d4cfa
 
16b8b2c
23d4cfa
 
 
 
 
 
e9c5837
462a507
e9c5837
2ae1d12
 
e9c5837
 
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
 
c980ffd
 
 
c4df0b5
c980ffd
911b39a
 
 
 
 
 
 
 
 
 
 
c980ffd
 
 
 
 
 
 
 
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c980ffd
16b8b2c
 
 
 
 
 
 
 
 
c980ffd
16b8b2c
 
 
23d4cfa
 
 
 
 
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23d4cfa
 
 
 
a05e96e
23d4cfa
16b8b2c
23d4cfa
97d83d9
 
23d4cfa
 
16b8b2c
da7713e
39186e9
 
16b8b2c
 
23d4cfa
58f3a48
23d4cfa
6469880
16b8b2c
 
 
 
 
23d4cfa
f834df3
d74463a
f834df3
23d4cfa
16b8b2c
 
90857ea
16b8b2c
2f8afe5
16b8b2c
d5458fa
1584125
23d4cfa
16b8b2c
23d4cfa
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
0aa9031
16b8b2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
from pytubefix import YouTube
from pytubefix.cli import on_progress
import time
import math
import gradio as gr
import ffmpeg
from faster_whisper import WhisperModel
import requests
import json
import arabic_reshaper # pip install arabic-reshaper
from bidi.algorithm import get_display # pip install python-bidi
from moviepy import *
import pysrt
import instaloader
import time
import concurrent.futures
import re
from io import BytesIO
from PIL import Image
api_key = "268976:66f4f58a2a905"


def extract_audio(input_video_name):
    # Define the input video file and output audio file
    mp3_file = "audio.mp3"

    # Load the video clip
    video_clip = VideoFileClip(input_video_name)

    # Extract the audio from the video clip
    audio_clip = video_clip.audio

    # Write the audio to a separate file
    audio_clip.write_audiofile(mp3_file)

    # Close the video and audio clips
    audio_clip.close()
    video_clip.close()

    print("Audio extraction successful!")
    return mp3_file

def transcribe(audio, max_segment_duration=2.0):  # Set your desired max duration here
    model = WhisperModel("tiny", device="cpu")
    segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True)
    segments = list(segments)  # The transcription will actually run here.
    wordlevel_info = []
    for segment in segments:
        for word in segment.words:
          print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
          wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
    return wordlevel_info

def create_subtitles(wordlevel_info):
    punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '!', '?'}  # Add/remove punctuation as needed
    subtitles = []
    line = []

    for word_data in wordlevel_info:
        line.append(word_data)
        current_word = word_data['word']

        # Check if current word ends with punctuation or line reached 5 words
        ends_with_punct = current_word and (current_word[-1] in punctuation_marks)

        if ends_with_punct or len(line) == 5:
            # Create a new subtitle segment
            subtitle = {
                "word": " ".join(item["word"] for item in line),
                "start": line[0]["start"],
                "end": line[-1]["end"],
                "textcontents": line.copy()
            }
            subtitles.append(subtitle)
            line = []

    # Add remaining words if any
    if line:
        subtitle = {
            "word": " ".join(item["word"] for item in line),
            "start": line[0]["start"],
            "end": line[-1]["end"],
            "textcontents": line.copy()
        }
        subtitles.append(subtitle)

    # Remove gaps between segments by extending the previous segment's end time
    for i in range(1, len(subtitles)):
        prev_subtitle = subtitles[i - 1]
        current_subtitle = subtitles[i]

        # Extend the previous segment's end time to the start of the current segment
        prev_subtitle["end"] = current_subtitle["start"]

    return subtitles

def format_time(seconds):
    hours = math.floor(seconds / 3600)
    seconds %= 3600
    minutes = math.floor(seconds / 60)
    seconds %= 60
    milliseconds = round((seconds - math.floor(seconds)) * 1000)
    seconds = math.floor(seconds)
    formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
    return formatted_time

def generate_subtitle_file(language, segments, input_video_name):
    subtitle_file = f"sub-{input_video_name}.{language}.srt"
    text = ""
    for index, segment in enumerate(segments):
        segment_start = format_time(segment['start'])
        segment_end = format_time(segment['end'])
        text += f"{str(index+1)} \n"
        text += f"{segment_start} --> {segment_end} \n"
        text += f"{segment['word']} \n"
        text += "\n"
    f = open(subtitle_file, "w", encoding='utf8')
    f.write(text)
    f.close()
    return subtitle_file

def clean_text(text):
    # Remove 'srt ' from the start of each line
    # Remove ''' from the start and end
    text = re.sub(r"^```|```$", '', text)
    text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
    return text

def translate_text(api_key, text, source_language = "en", target_language = "fa"):
    url = "https://api.one-api.ir/translate/v1/google/"
    request_body = {"source": source_lang, "target": target_lang, "text": text}
    headers = {"one-api-token": api_key, "Content-Type": "application/json"}
    response = requests.post(url, headers=headers, json=request_body)
    if response.status_code == 200:
        result = response.json()
        return result['result']
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

def enhance_text(api_key, text):
    url = "https://api.one-api.ir/chatbot/v1/gpt4o/"

    # Prepare the request body
    request_body = [{
        "role": "user",
        "content":  f"Please take the following SRT subtitle text in English and translate only the subtitle text into Persian. Ensure that all numbering and time codes remain unchanged. The output should be a new SRT file with the subtitles in Persian, preserving the original formatting and timings and exept for the subtitle dont return anything in response. the subtitle will be provided in the following message"
    },
    {
    "role": "assistant",
    "content": "okay"
    },
    {
    "role": "user",
    "content": text
    }
    ]

    # Add the API key to the request
    headers = {
        "one-api-token": api_key,
        "Content-Type": "application/json"
    }

    # Make the POST request
    attempts = 0
    max_attempts = 3

    while attempts < max_attempts:
        response = requests.post(url, headers=headers, json=request_body)
        if response.status_code == 200:
            result = response.json()
            if result["status"] == 200:
                print("status: ", result["status"])
                te = clean_text(result["result"][0])
                print("result: ", te)
                return te
            else:
                print(f"Error: status {result['status']}, retrying in 30 seconds...")
        else:
            print(f"Error: {response.status_code}, {response.text}, retrying in 30 seconds...")
        attempts += 1
        time.sleep(30)
    print("Error Max attempts reached. Could not retrieve a successful response.")
    te = translate_text(api_key, text)
    return te

def read_srt_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            srt_content = file.read()
            return srt_content
    except FileNotFoundError:
        print(f"The file {file_path} was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

def write_srt(subtitle_text, output_file="edited_srt.srt"):
    with open(output_file, 'w', encoding="utf-8") as file:
        file.write(subtitle_text)

def write_google(google_translate):
    google = "google_translate.srt"
    with open(google, 'w', encoding="utf-8") as f:
        f.write(google_translate)
    return google

def generate_translated_subtitle(language, segments, input_video_name):
    input_video_name=input_video_name.split('/')[-1]
    subtitle_file = f"{input_video_name}.srt"
    text = ""
    lines = segments.split('\n')
    new_list = [item for item in lines if item != '']
    segment_number = 1

    for index, segment in enumerate(new_list):
        if (index+1) % 3 == 1 or (index+1)==1:
            text += f"{segment}\n"
            segment_number += 1
        if (index+1) % 3 == 2 or (index+1)==2:
            text += segment + "\n"
        if (index+1) % 3 == 0:
            text += f"\u200F{segment}\n\n"

    with open(subtitle_file, "w", encoding='utf8') as f:
        f.write(text)
    return subtitle_file

def time_to_seconds(time_obj):
    return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000

def create_subtitle_clips(subtitles, videosize, fontsize, font, color, debug):
    subtitle_clips = []
    color_clips=[]
    for subtitle in subtitles:
        start_time = time_to_seconds(subtitle.start) # Add 2 seconds offset
        end_time = time_to_seconds(subtitle.end)
        duration = end_time - start_time
        video_width, video_height = videosize
        max_width = video_width * 0.8
        max_height = video_height * 0.2
        #reshaped_text = arabic_reshaper.reshape(subtitle.text)
        #bidi_text = get_display(reshaped_text)
        text_clip = TextClip(font, subtitle.text, font_size=fontsize, size=(int(video_width * 0.8), int(video_height * 0.2)) ,text_align="right" ,color=color, method='caption').with_start(start_time).with_duration(duration)
        myclip = ColorClip(size=(int(video_width * 0.8), int(video_height * 0.2)) , color=(225, 0, 0)).with_opacity(0.2).with_start(start_time).with_duration(duration)
        subtitle_x_position = 'center'
        subtitle_y_position = video_height * 0.68
        text_position = (subtitle_x_position, subtitle_y_position)
        subtitle_clips.append(text_clip.with_position(text_position))
        color_clips.append(myclip.with_position(text_position))
    return subtitle_clips, color_clips

def video_edit(srt, input_video, input_audio= "audio.mp3"):
    input_video_name = "video"
    video = VideoFileClip(input_video)
    audio = AudioFileClip(input_audio)
    video = video.with_audio(audio)
    print(video)
    output_video_file = input_video_name + '_subtitled' + ".mp4"
    #write_srt(srt)
    subtitles = pysrt.open("video_subtitled.srt", encoding="utf-8")
    subtitle_clips, color_clips = create_subtitle_clips(subtitles, video.size, 24, 'arial.ttf', 'white', False)
    final_video = CompositeVideoClip([video]+color_clips + subtitle_clips)
    final_video.write_videofile(output_video_file, codec="libx264", audio_codec="aac", logger=None)
    #os.remove("google_translate.srt")
    print('final')
    return output_video_file

def process_video(video, clip_type):

    mp3_file=extract_audio(video)
    wordlevel_info=transcribe(mp3_file)
    subtitles = create_subtitles(wordlevel_info)
    subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
    srt_string = read_srt_file(subtitle_file)
    google_translate = enhance_text(api_key, srt_string)
    srt = write_google(google_translate)
    #segments = pysrt.open(srt, encoding="utf-8")
    sub = generate_translated_subtitle("fa", google_translate, "video_subtitled")
    output_video_file = video_edit(sub, video, input_audio= "audio.mp3")

    return output_video_file

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Run** to see the output.")
    with gr.Column():
        video_file_input = gr.Video(label="Upload Video File")
        clip_type = gr.Dropdown(["auto edit", "default"], label="Clip Type")
        btn = gr.Button("create")
        video_file_output = gr.Video(label="result: ")
        btn.click(fn=process_video, inputs=[video_file_input, clip_type], outputs=video_file_output)
"""    with gr.Row():
        vid_out = gr.Video()
        srt_file = gr.File()
        btn2 = gr.Button("transcribe")
        gr.on(
            triggers=[btn2.click],
            fn=write_google,
            inputs=out,
        ).then(video_edit, [out, video_path_output, audio_path_output], outputs=[vid_out, srt_file])"""


demo.launch(debug=True)