File size: 11,490 Bytes
23d4cfa 0b484f3 23d4cfa 7b82c63 23d4cfa 8bec4e8 1c45af0 3e04bc5 8fc9419 11c4b73 a02cdd0 23d4cfa 8bec4e8 23d4cfa 16b8b2c 23d4cfa 16b8b2c 23d4cfa 16b8b2c 23d4cfa 16b8b2c 23d4cfa e9c5837 462a507 e9c5837 2ae1d12 e9c5837 16b8b2c c980ffd c4df0b5 c980ffd 911b39a c980ffd 16b8b2c c980ffd 16b8b2c c980ffd 16b8b2c 23d4cfa 16b8b2c 23d4cfa a05e96e 23d4cfa 16b8b2c 23d4cfa 97d83d9 23d4cfa 16b8b2c da7713e 39186e9 16b8b2c 23d4cfa 58f3a48 23d4cfa 6469880 16b8b2c 23d4cfa f834df3 d74463a f834df3 23d4cfa 16b8b2c 90857ea 16b8b2c 2f8afe5 16b8b2c d5458fa 1584125 23d4cfa 16b8b2c 23d4cfa 16b8b2c 0aa9031 16b8b2c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
from pytubefix import YouTube
from pytubefix.cli import on_progress
import time
import math
import gradio as gr
import ffmpeg
from faster_whisper import WhisperModel
import requests
import json
import arabic_reshaper # pip install arabic-reshaper
from bidi.algorithm import get_display # pip install python-bidi
from moviepy import *
import pysrt
import instaloader
import time
import concurrent.futures
import re
from io import BytesIO
from PIL import Image
api_key = "268976:66f4f58a2a905"
def extract_audio(input_video_name):
# Define the input video file and output audio file
mp3_file = "audio.mp3"
# Load the video clip
video_clip = VideoFileClip(input_video_name)
# Extract the audio from the video clip
audio_clip = video_clip.audio
# Write the audio to a separate file
audio_clip.write_audiofile(mp3_file)
# Close the video and audio clips
audio_clip.close()
video_clip.close()
print("Audio extraction successful!")
return mp3_file
def transcribe(audio, max_segment_duration=2.0): # Set your desired max duration here
model = WhisperModel("tiny", device="cpu")
segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True)
segments = list(segments) # The transcription will actually run here.
wordlevel_info = []
for segment in segments:
for word in segment.words:
print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
return wordlevel_info
def create_subtitles(wordlevel_info):
punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '!', '?'} # Add/remove punctuation as needed
subtitles = []
line = []
for word_data in wordlevel_info:
line.append(word_data)
current_word = word_data['word']
# Check if current word ends with punctuation or line reached 5 words
ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
if ends_with_punct or len(line) == 5:
# Create a new subtitle segment
subtitle = {
"word": " ".join(item["word"] for item in line),
"start": line[0]["start"],
"end": line[-1]["end"],
"textcontents": line.copy()
}
subtitles.append(subtitle)
line = []
# Add remaining words if any
if line:
subtitle = {
"word": " ".join(item["word"] for item in line),
"start": line[0]["start"],
"end": line[-1]["end"],
"textcontents": line.copy()
}
subtitles.append(subtitle)
# Remove gaps between segments by extending the previous segment's end time
for i in range(1, len(subtitles)):
prev_subtitle = subtitles[i - 1]
current_subtitle = subtitles[i]
# Extend the previous segment's end time to the start of the current segment
prev_subtitle["end"] = current_subtitle["start"]
return subtitles
def format_time(seconds):
hours = math.floor(seconds / 3600)
seconds %= 3600
minutes = math.floor(seconds / 60)
seconds %= 60
milliseconds = round((seconds - math.floor(seconds)) * 1000)
seconds = math.floor(seconds)
formatted_time = f"{hours:02d}:{minutes:02d}:{seconds:01d},{milliseconds:03d}"
return formatted_time
def generate_subtitle_file(language, segments, input_video_name):
subtitle_file = f"sub-{input_video_name}.{language}.srt"
text = ""
for index, segment in enumerate(segments):
segment_start = format_time(segment['start'])
segment_end = format_time(segment['end'])
text += f"{str(index+1)} \n"
text += f"{segment_start} --> {segment_end} \n"
text += f"{segment['word']} \n"
text += "\n"
f = open(subtitle_file, "w", encoding='utf8')
f.write(text)
f.close()
return subtitle_file
def clean_text(text):
# Remove 'srt ' from the start of each line
# Remove ''' from the start and end
text = re.sub(r"^```|```$", '', text)
text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
return text
def translate_text(api_key, text, source_language = "en", target_language = "fa"):
url = "https://api.one-api.ir/translate/v1/google/"
request_body = {"source": source_lang, "target": target_lang, "text": text}
headers = {"one-api-token": api_key, "Content-Type": "application/json"}
response = requests.post(url, headers=headers, json=request_body)
if response.status_code == 200:
result = response.json()
return result['result']
else:
print(f"Error: {response.status_code}, {response.text}")
return None
def enhance_text(api_key, text):
url = "https://api.one-api.ir/chatbot/v1/gpt4o/"
# Prepare the request body
request_body = [{
"role": "user",
"content": f"Please take the following SRT subtitle text in English and translate only the subtitle text into Persian. Ensure that all numbering and time codes remain unchanged. The output should be a new SRT file with the subtitles in Persian, preserving the original formatting and timings and exept for the subtitle dont return anything in response. the subtitle will be provided in the following message"
},
{
"role": "assistant",
"content": "okay"
},
{
"role": "user",
"content": text
}
]
# Add the API key to the request
headers = {
"one-api-token": api_key,
"Content-Type": "application/json"
}
# Make the POST request
attempts = 0
max_attempts = 3
while attempts < max_attempts:
response = requests.post(url, headers=headers, json=request_body)
if response.status_code == 200:
result = response.json()
if result["status"] == 200:
print("status: ", result["status"])
te = clean_text(result["result"][0])
print("result: ", te)
return te
else:
print(f"Error: status {result['status']}, retrying in 30 seconds...")
else:
print(f"Error: {response.status_code}, {response.text}, retrying in 30 seconds...")
attempts += 1
time.sleep(30)
print("Error Max attempts reached. Could not retrieve a successful response.")
te = translate_text(api_key, text)
return te
def read_srt_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
srt_content = file.read()
return srt_content
except FileNotFoundError:
print(f"The file {file_path} was not found.")
except Exception as e:
print(f"An error occurred: {e}")
def write_srt(subtitle_text, output_file="edited_srt.srt"):
with open(output_file, 'w', encoding="utf-8") as file:
file.write(subtitle_text)
def write_google(google_translate):
google = "google_translate.srt"
with open(google, 'w', encoding="utf-8") as f:
f.write(google_translate)
return google
def generate_translated_subtitle(language, segments, input_video_name):
input_video_name=input_video_name.split('/')[-1]
subtitle_file = f"{input_video_name}.srt"
text = ""
lines = segments.split('\n')
new_list = [item for item in lines if item != '']
segment_number = 1
for index, segment in enumerate(new_list):
if (index+1) % 3 == 1 or (index+1)==1:
text += f"{segment}\n"
segment_number += 1
if (index+1) % 3 == 2 or (index+1)==2:
text += segment + "\n"
if (index+1) % 3 == 0:
text += f"\u200F{segment}\n\n"
with open(subtitle_file, "w", encoding='utf8') as f:
f.write(text)
return subtitle_file
def time_to_seconds(time_obj):
return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000
def create_subtitle_clips(subtitles, videosize, fontsize, font, color, debug):
subtitle_clips = []
color_clips=[]
for subtitle in subtitles:
start_time = time_to_seconds(subtitle.start) # Add 2 seconds offset
end_time = time_to_seconds(subtitle.end)
duration = end_time - start_time
video_width, video_height = videosize
max_width = video_width * 0.8
max_height = video_height * 0.2
#reshaped_text = arabic_reshaper.reshape(subtitle.text)
#bidi_text = get_display(reshaped_text)
text_clip = TextClip(font, subtitle.text, font_size=fontsize, size=(int(video_width * 0.8), int(video_height * 0.2)) ,text_align="right" ,color=color, method='caption').with_start(start_time).with_duration(duration)
myclip = ColorClip(size=(int(video_width * 0.8), int(video_height * 0.2)) , color=(225, 0, 0)).with_opacity(0.2).with_start(start_time).with_duration(duration)
subtitle_x_position = 'center'
subtitle_y_position = video_height * 0.68
text_position = (subtitle_x_position, subtitle_y_position)
subtitle_clips.append(text_clip.with_position(text_position))
color_clips.append(myclip.with_position(text_position))
return subtitle_clips, color_clips
def video_edit(srt, input_video, input_audio= "audio.mp3"):
input_video_name = "video"
video = VideoFileClip(input_video)
audio = AudioFileClip(input_audio)
video = video.with_audio(audio)
print(video)
output_video_file = input_video_name + '_subtitled' + ".mp4"
#write_srt(srt)
subtitles = pysrt.open("video_subtitled.srt", encoding="utf-8")
subtitle_clips, color_clips = create_subtitle_clips(subtitles, video.size, 24, 'arial.ttf', 'white', False)
final_video = CompositeVideoClip([video]+color_clips + subtitle_clips)
final_video.write_videofile(output_video_file, codec="libx264", audio_codec="aac", logger=None)
#os.remove("google_translate.srt")
print('final')
return output_video_file
def process_video(video, clip_type):
mp3_file=extract_audio(video)
wordlevel_info=transcribe(mp3_file)
subtitles = create_subtitles(wordlevel_info)
subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
srt_string = read_srt_file(subtitle_file)
google_translate = enhance_text(api_key, srt_string)
srt = write_google(google_translate)
#segments = pysrt.open(srt, encoding="utf-8")
sub = generate_translated_subtitle("fa", google_translate, "video_subtitled")
output_video_file = video_edit(sub, video, input_audio= "audio.mp3")
return output_video_file
with gr.Blocks() as demo:
gr.Markdown("Start typing below and then click **Run** to see the output.")
with gr.Column():
video_file_input = gr.Video(label="Upload Video File")
clip_type = gr.Dropdown(["auto edit", "default"], label="Clip Type")
btn = gr.Button("create")
video_file_output = gr.Video(label="result: ")
btn.click(fn=process_video, inputs=[video_file_input, clip_type], outputs=video_file_output)
""" with gr.Row():
vid_out = gr.Video()
srt_file = gr.File()
btn2 = gr.Button("transcribe")
gr.on(
triggers=[btn2.click],
fn=write_google,
inputs=out,
).then(video_edit, [out, video_path_output, audio_path_output], outputs=[vid_out, srt_file])"""
demo.launch(debug=True) |