bale

Sleeping

App Files Files Community

SPACERUNNER99 commited on 24 days ago

Commit

16b8b2c

verified ·

1 Parent(s): 65639a4

Update app.py

Browse files

Files changed (1) hide show

app.py +171 -327

app.py CHANGED Viewed

@@ -20,237 +20,6 @@ from PIL import Image
 api_key = "268976:66f4f58a2a905"
-#download from bale
-from balethon import Client
-from balethon.conditions import document
-bot = Client("1261816176:T4jSrvlJiCfdV5UzUkpywN2HFrzef1IZJs5URAkz")
-@bot.on_message(document)
-async def download_document(client, message):
-    downloading = await message.reply("Downloading...")
-    response = await client.download(message.document.id)
-    mime_type = message.document.mime_type.split("/")[-1]
-    file_format = mime_type.split(";")[0]
-    with open(f"downloaded file.{file_format}", "wb") as file:
-        file.write(response)
-    await downloading.edit_text("Download completed")
-bot.run()
-def fetch_data(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        return response.json()
-    except requests.exceptions.RequestException as e:
-        print(f"An error occurred: {e}")
-        return None
-def download_file(url):
-    try:
-        response = requests.get(url.split("#")[0], stream=True)
-        response.raise_for_status()
-        print(url.split("#")[1])
-        with open(url.split("#")[1], 'wb') as file:
-            for chunk in response.iter_content(chunk_size=8192):
-                if chunk:
-                    file.write(chunk)
-        print(f"Downloaded successfully: {url.split('#')[1]}")
-    except requests.exceptions.RequestException as e:
-        print(f"An error occurred: {e}")
-def download_chunk(url, start, end, filename, index):
-    headers = {'Range': f'bytes={start}-{end}'}
-    response = requests.get(url, headers=headers, stream=True)
-    response.raise_for_status()
-    chunk_filename = f'{filename}.part{index}'
-    with open(chunk_filename, 'wb') as file:
-        for chunk in response.iter_content(chunk_size=8192):
-            if chunk:
-                file.write(chunk)
-    return chunk_filename
-def merge_files(filename, num_parts):
-    with open(filename, 'wb') as output_file:
-        for i in range(num_parts):
-            part_filename = f'{filename}.part{i}'
-            with open(part_filename, 'rb') as part_file:
-                output_file.write(part_file.read())
-            # Optionally, delete the part file after merging
-            # os.remove(part_filename)
-def download_file_in_parallel(link, size, num_threads=4):
-    url = link.split("#")[0]
-    filename = link.split("#")[1]
-    print(url+" filename: "+filename)
-    response = requests.head(url)
-    #file_size = int(response.headers['Content-Length'])
-    chunk_size = size // num_threads
-    ranges = [(i * chunk_size, (i + 1) * chunk_size - 1) for i in range(num_threads)]
-    ranges[-1] = (ranges[-1][0], size - 1)  # Adjust the last range to the end of the file
-    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
-        futures = [
-            executor.submit(download_chunk, url, start, end, filename, i)
-            for i, (start, end) in enumerate(ranges)
-        ]
-        for future in concurrent.futures.as_completed(futures):
-            future.result()  # Ensure all threads complete
-    merge_files(filename, num_threads)
-    print(f'Downloaded successfully: {filename}')
-def one_youtube(link, api_key):
-    # Fetch video ID
-    video_id_url = f"https://one-api.ir/youtube/?token={api_key}&action=getvideoid&link={link}"
-    video_data = fetch_data(video_id_url)
-    if not video_data:
-        return None, None
-    video_id = video_data["result"]
-    # Fetch video data
-    filter_option = ""  # Replace with your filter option
-    video_data_url = f"https://youtube.one-api.ir/?token={api_key}&action=fullvideo&id={video_id}&filter={filter_option}"
-    video_data_2 = fetch_data(video_data_url)
-    if not video_data_2:
-        return None, None
-    formats_list = video_data_2["result"]["formats"]
-    file_name = video_data_2["result"]["title"]
-    video_name = f'{file_name}.mp4'
-    audio_name = f'{file_name}.mp3'
-    for f in formats_list:
-        if f["format_note"] == "360p":
-            download_id = f["id"]
-            video_size = f["filesize"]
-    for f in formats_list:
-        if f["format_note"] == "medium":
-            audio_id = f["id"]
-            audio_size = f["filesize"]
-    if not download_id or not audio_id:
-        return None, None
-    # Fetch video and audio links
-    video_link_url = f"https://youtube.one-api.ir/?token={api_key}&action=download&id={download_id}"
-    audio_link_url = f"https://youtube.one-api.ir/?token={api_key}&action=download&id={audio_id}"
-    video_link_data = fetch_data(video_link_url)
-    audio_link_data = fetch_data(audio_link_url)
-    if not video_link_data or not audio_link_data:
-        return None, None
-    video_link = video_link_data["result"]["link"]
-    audio_link = audio_link_data["result"]["link"]
-    vid_str=video_link+"#"+video_name
-    audio_str=audio_link+"#"+audio_name
-    # Download video and audio files
-    print(video_size , audio_size)
-    download_file_in_parallel(vid_str, video_size)
-    download_file_in_parallel(audio_str, audio_size)
-    return video_name, audio_name
-# Define your functions here
-def yt_download(url):
-    yt = YouTube(url)
-    print(yt.title)
-    video_path = f"{yt.title}.mp4"
-    ys = yt.streams.get_highest_resolution()
-    print(ys)
-    ys.download()
-    return video_path, yt.title
-def download_image(url, save_path='downloaded_image.jpg'):
-    response = requests.get(url)
-    image = Image.open(BytesIO(response.content))
-    image.save(save_path)
-    return save_path
-def insta_oneapi(url, api_key):
-    shortcode = url.split("/")[-2]
-    print(shortcode)
-    url_one="https://api.one-api.ir/instagram/v1/post/?shortcode="+shortcode
-    request_body = [{"shortcode": shortcode},]
-    headers = {"one-api-token": api_key, "Content-Type": "application/json"}
-    response = requests.get(url_one, headers=headers)
-    print(response)
-    if response.status_code == 200:
-        result = response.json()
-        try:
-            time.sleep(10)
-            response = requests.get(result["result"]['media'][0]["url"], stream=True)
-            response.raise_for_status()
-            with open("video.mp4", 'wb') as file:
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        file.write(chunk)
-            print(f"Downloaded successfully")
-            image_url = result["result"]['media'][0]["cover"]
-            image_file_path = download_image(image_url)
-            return "video.mp4", image_file_path
-        except requests.exceptions.RequestException as e:
-            print(f"An error occurred: {e}")
-    else:
-        print(f"Error: {response.status_code}, {response.text}")
-        return None
-def insta_download(permalink):
-    # Create an instance of Instaloader
-    L = instaloader.Instaloader()
-    try:
-        # Extract the shortcode from the permalink
-        if "instagram.com/reel/" in permalink:
-            shortcode = permalink.split("instagram.com/reel/")[-1].split("/")[0]
-        elif "instagram.com/p/" in permalink:
-            shortcode = permalink.split("instagram.com/p/")[-1].split("/")[0]
-        else:
-            raise ValueError("Invalid permalink format")
-        # Load the post using the shortcode
-        post = instaloader.Post.from_shortcode(L.context, shortcode)
-        # Check if the post is a video
-        if not post.is_video:
-            raise ValueError("The provided permalink is not a video.")
-        # Get the video URL
-        video_url = post.video_url
-        # Extract the filename from the URL
-        filename = video_url.split("/")[-1]
-        # Remove query parameters
-        filename = filename.split("?")[0]
-        # Download the video using requests
-        response = requests.get(video_url, stream=True)
-        response.raise_for_status()  # Raise an error for bad responses
-        # Save the content to a file
-        with open(filename, 'wb') as file:
-            for chunk in response.iter_content(chunk_size=8192):
-                file.write(chunk)
-        print(f"Downloaded video {filename} successfully.")
-        return filename
-    except Exception as e:
-        print(f"Failed to download video from {permalink}: {e}")
 def extract_audio(input_video_name):
     # Define the input video file and output audio file
     mp3_file = "audio.mp3"
@@ -271,13 +40,59 @@ def extract_audio(input_video_name):
     print("Audio extraction successful!")
     return mp3_file
-def transcribe(audio):
-    model = WhisperModel("tiny")
-    segments, info = model.transcribe(audio)
-    segments = list(segments)
     for segment in segments:
-        print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
-    return segments
 def format_time(seconds):
     hours = math.floor(seconds / 3600)
@@ -293,27 +108,17 @@ def generate_subtitle_file(language, segments, input_video_name):
     subtitle_file = f"sub-{input_video_name}.{language}.srt"
     text = ""
     for index, segment in enumerate(segments):
-        segment_start = format_time(segment.start)
-        segment_end = format_time(segment.end)
         text += f"{str(index+1)} \n"
         text += f"{segment_start} --> {segment_end} \n"
-        text += f"{segment.text} \n"
         text += "\n"
     f = open(subtitle_file, "w", encoding='utf8')
     f.write(text)
     f.close()
     return subtitle_file
-def read_srt_file(file_path):
-    try:
-        with open(file_path, 'r', encoding='utf-8') as file:
-            srt_content = file.read()
-            return srt_content
-    except FileNotFoundError:
-        print(f"The file {file_path} was not found.")
-    except Exception as e:
-        print(f"An error occurred: {e}")
 def clean_text(text):
     # Remove 'srt ' from the start of each line
     # Remove ''' from the start and end
@@ -321,13 +126,25 @@ def clean_text(text):
     text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
     return text
-def enhance_text(api_key, text, google):
     url = "https://api.one-api.ir/chatbot/v1/gpt4o/"
     # Prepare the request body
     request_body = [{
         "role": "user",
-        "content": f"{text} Translate the above text into Persian, converting the English terms used in it into common Persian terms. in respose dont add any thing exept for the srt formated translation."
     },]
     # Add the API key to the request
@@ -337,114 +154,141 @@ def enhance_text(api_key, text, google):
     }
     # Make the POST request
-    response = requests.post(url, headers=headers, json=request_body)
-    # Check the response status
-    if response.status_code == 200:
-        result = response.json()
-        clean_text(result["result"][0])
-        last = clean_text(result["result"][0])
-        print("result: ")
-        print(last)
-        return last
-    else:
-        print(f"Error: {response.status_code}, {response.text}")
-        return None
-def translate_text(api_key, source_lang, target_lang, text):
-    url = "https://api.one-api.ir/translate/v1/google/"
-    request_body = {"source": source_lang, "target": target_lang, "text": text}
-    headers = {"one-api-token": api_key, "Content-Type": "application/json"}
-    response = requests.post(url, headers=headers, json=request_body)
-    if response.status_code == 200:
-        result = response.json()
-        enhanced_text = enhance_text(api_key, text, result['result'])
-        return enhanced_text
-    else:
-        print(f"Error: {response.status_code}, {response.text}")
-        return None
 def write_google(google_translate):
     google = "google_translate.srt"
     with open(google, 'w', encoding="utf-8") as f:
         f.write(google_translate)
 def time_to_seconds(time_obj):
     return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000
 def create_subtitle_clips(subtitles, videosize, fontsize, font, color, debug):
     subtitle_clips = []
     for subtitle in subtitles:
         start_time = time_to_seconds(subtitle.start) # Add 2 seconds offset
         end_time = time_to_seconds(subtitle.end)
         duration = end_time - start_time
         video_width, video_height = videosize
-        max_width = video_width * 0.8
         max_height = video_height * 0.2
         #reshaped_text = arabic_reshaper.reshape(subtitle.text)
         #bidi_text = get_display(reshaped_text)
-        text_clip = TextClip(font, subtitle.text, font_size=fontsize, size=(int(video_width * 0.8), int(video_height * 0.2)) ,text_align="center" ,color=color, method='caption').with_start(start_time).with_duration(duration)
         subtitle_x_position = 'center'
         subtitle_y_position = video_height * 0.68
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(text_clip.with_position(text_position))
-    return subtitle_clips
-def process_video(url, type):
-    if type=="insta":
-        input_video, image_path=insta_oneapi(url, api_key)
-        input_video_name = input_video.replace(".mp4", "")
-        video = VideoFileClip(input_video)
-        image_clip = ImageClip(image_path).with_duration(1)
-        # Set the position and size of the image (optional)
-        image_clip = image_clip.with_position(("center", "center")).resized(height=video.size[1])
-        first_video = CompositeVideoClip([video.with_start(1), image_clip])
-        input_video = input_video_name+"_cover.mp4"
-        input_video_name = input_video.replace(".mp4", "")
-        first_video.write_videofile(input_video, codec="libx264", audio_codec="aac", logger=None)
-        input_audio = extract_audio(input_video)
-    elif type=="youtube":
-        input_video, input_audio = one_youtube(url, api_key)
-        input_video_name = input_video.replace(".mp4", "")
-    # Get the current local time
-    t = time.localtime()
-    # Format the time as a string
-    current_time = time.strftime("%H:%M:%S", t)
-    print("Current Time =", current_time)
-    segments = transcribe(audio=input_audio)
-    language = "fa"
-    subtitle_file = generate_subtitle_file(language=language, segments=segments, input_video_name=input_video_name)
-    source_language = "en"
-    target_language = "fa"
-    srt_string = read_srt_file(subtitle_file)
-    google_translate = translate_text(api_key, source_language, target_language, srt_string)
-    write_google(google_translate)
     video = VideoFileClip(input_video)
     audio = AudioFileClip(input_audio)
     video = video.with_audio(audio)
     print(video)
-    subtitles = pysrt.open("google_translate.srt", encoding="utf-8")
     output_video_file = input_video_name + '_subtitled' + ".mp4"
-    subtitle_clips = create_subtitle_clips(subtitles, video.size, 32, 'arial.ttf', 'white', False)
-    final_video = CompositeVideoClip([video] + subtitle_clips)
     final_video.write_videofile(output_video_file, codec="libx264", audio_codec="aac", logger=None)
     print('final')
-    # Get the current local time
-    t = time.localtime()
-    # Format the time as a string
-    current_time = time.strftime("%H:%M:%S", t)
-    print("Current Time =", current_time)
-    # Generate the URL for the file
     return output_video_file
-def download_file(file_path):
-    return gr.File.update(file_path)
-iface = gr.Interface(fn=process_video, inputs=["text" ,gr.Dropdown(["insta", "youtube"])], outputs="file")
-iface.launch(debug=True)

 api_key = "268976:66f4f58a2a905"
 def extract_audio(input_video_name):
     # Define the input video file and output audio file
     mp3_file = "audio.mp3"
     print("Audio extraction successful!")
     return mp3_file
+def transcribe(audio, max_segment_duration=2.0):  # Set your desired max duration here
+    model = WhisperModel("tiny", device="cpu")
+    segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True)
+    segments = list(segments)  # The transcription will actually run here.
+    wordlevel_info = []
     for segment in segments:
+        for word in segment.words:
+          print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
+          wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
+    return wordlevel_info
+def create_subtitles(wordlevel_info):
+    punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '！', '？'}  # Add/remove punctuation as needed
+    subtitles = []
+    line = []
+    for word_data in wordlevel_info:
+        line.append(word_data)
+        current_word = word_data['word']
+        # Check if current word ends with punctuation or line reached 5 words
+        ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
+        if ends_with_punct or len(line) == 5:
+            # Create a new subtitle segment
+            subtitle = {
+                "word": " ".join(item["word"] for item in line),
+                "start": line[0]["start"],
+                "end": line[-1]["end"],
+                "textcontents": line.copy()
+            }
+            subtitles.append(subtitle)
+            line = []
+    # Add remaining words if any
+    if line:
+        subtitle = {
+            "word": " ".join(item["word"] for item in line),
+            "start": line[0]["start"],
+            "end": line[-1]["end"],
+            "textcontents": line.copy()
+        }
+        subtitles.append(subtitle)
+    # Remove gaps between segments by extending the previous segment's end time
+    for i in range(1, len(subtitles)):
+        prev_subtitle = subtitles[i - 1]
+        current_subtitle = subtitles[i]
+        # Extend the previous segment's end time to the start of the current segment
+        prev_subtitle["end"] = current_subtitle["start"]
+    return subtitles
 def format_time(seconds):
     hours = math.floor(seconds / 3600)
     subtitle_file = f"sub-{input_video_name}.{language}.srt"
     text = ""
     for index, segment in enumerate(segments):
+        segment_start = format_time(segment['start'])
+        segment_end = format_time(segment['end'])
         text += f"{str(index+1)} \n"
         text += f"{segment_start} --> {segment_end} \n"
+        text += f"{segment['word']} \n"
         text += "\n"
     f = open(subtitle_file, "w", encoding='utf8')
     f.write(text)
     f.close()
     return subtitle_file
 def clean_text(text):
     # Remove 'srt ' from the start of each line
     # Remove ''' from the start and end
     text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
     return text
+def translate_text(api_key, text, source_language = "en", target_language = "fa"):
+    url = "https://api.one-api.ir/translate/v1/google/"
+    request_body = {"source": source_lang, "target": target_lang, "text": text}
+    headers = {"one-api-token": api_key, "Content-Type": "application/json"}
+    response = requests.post(url, headers=headers, json=request_body)
+    if response.status_code == 200:
+        result = response.json()
+        return result['result']
+    else:
+        print(f"Error: {response.status_code}, {response.text}")
+        return None
+def enhance_text(api_key, text):
     url = "https://api.one-api.ir/chatbot/v1/gpt4o/"
     # Prepare the request body
     request_body = [{
         "role": "user",
+        "content":  f"i will provide you with an english subtitle of a clip which is in srt format and i need you to translate each line in persian an return in a srt format without changing the original timing, converting the English terms used, into common Persian terms. in respose dont add any thing and keep the srt format, keep in mind the duraetion of the end of the srt should be the same as the duaration of the clip. subtitle: {text} "
     },]
     # Add the API key to the request
     }
     # Make the POST request
+    attempts = 0
+    max_attempts = 3
+    while attempts < max_attempts:
+        response = requests.post(url, headers=headers, json=request_body)
+        if response.status_code == 200:
+            result = response.json()
+            if result["status"] == 200:
+                print("status: ", result["status"])
+                te = clean_text(result["result"][0])
+                print("result: ", te)
+                return te
+            else:
+                print(f"Error: status {result['status']}, retrying in 30 seconds...")
+        else:
+            print(f"Error: {response.status_code}, {response.text}, retrying in 30 seconds...")
+        attempts += 1
+        time.sleep(30)
+    print("Error Max attempts reached. Could not retrieve a successful response.")
+    te = translate_text(api_key, text)
+    return te
+def read_srt_file(file_path):
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            srt_content = file.read()
+            return srt_content
+    except FileNotFoundError:
+        print(f"The file {file_path} was not found.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+def write_srt(subtitle_text, output_file="edited_srt.srt"):
+    with open(output_file, 'w', encoding="utf-8") as file:
+        file.write(subtitle_text)
 def write_google(google_translate):
     google = "google_translate.srt"
     with open(google, 'w', encoding="utf-8") as f:
         f.write(google_translate)
+    return google
+def generate_translated_subtitle(language, segments, input_video_name):
+    input_video_name=input_video_name.split('/')[-1]
+    subtitle_file = f"{input_video_name}.srt"
+    text = ""
+    lines = segments.split('\n')
+    new_list = [item for item in lines if item != '']
+    segment_number = 1
+    for index, segment in enumerate(new_list):
+        if (index+1) % 3 == 1 or (index+1)==1:
+            text += f"{segment}\n"
+            segment_number += 1
+        if (index+1) % 3 == 2 or (index+1)==2:
+            text += segment + "\n"
+        if (index+1) % 3 == 0:
+            text += f"\u200F{segment}\n\n"
+    with open(subtitle_file, "w", encoding='utf8') as f:
+        f.write(text)
+    return subtitle_file
 def time_to_seconds(time_obj):
     return time_obj.hours * 3600 + time_obj.minutes * 60 + time_obj.seconds + time_obj.milliseconds / 1000
 def create_subtitle_clips(subtitles, videosize, fontsize, font, color, debug):
     subtitle_clips = []
+    color_clips=[]
     for subtitle in subtitles:
         start_time = time_to_seconds(subtitle.start) # Add 2 seconds offset
         end_time = time_to_seconds(subtitle.end)
         duration = end_time - start_time
         video_width, video_height = videosize
+        max_width = video_width * 0.8
         max_height = video_height * 0.2
         #reshaped_text = arabic_reshaper.reshape(subtitle.text)
         #bidi_text = get_display(reshaped_text)
+        text_clip = TextClip(font, subtitle.text, font_size=fontsize, size=(int(video_width * 0.8), int(video_height * 0.2)) ,text_align="right" ,color=color, method='caption').with_start(start_time).with_duration(duration)
+        myclip = ColorClip(size=(int(video_width * 0.8), int(video_height * 0.2)) , color=(225, 0, 0)).with_opacity(0.2).with_start(start_time).with_duration(duration)
         subtitle_x_position = 'center'
         subtitle_y_position = video_height * 0.68
         text_position = (subtitle_x_position, subtitle_y_position)
         subtitle_clips.append(text_clip.with_position(text_position))
+        color_clips.append(myclip.with_position(text_position))
+    return subtitle_clips, color_clips
+def video_edit(srt, input_video, input_audio= "audio.mp3"):
+    input_video_name = "video"
     video = VideoFileClip(input_video)
     audio = AudioFileClip(input_audio)
     video = video.with_audio(audio)
     print(video)
     output_video_file = input_video_name + '_subtitled' + ".mp4"
+    #write_srt(srt)
+    subtitles = pysrt.open("video_subtitled.srt", encoding="utf-8")
+    subtitle_clips, color_clips = create_subtitle_clips(subtitles, video.size, 24, 'arial-unicode-ms.ttf', 'white', False)
+    final_video = CompositeVideoClip([video]+color_clips + subtitle_clips)
     final_video.write_videofile(output_video_file, codec="libx264", audio_codec="aac", logger=None)
+    #os.remove("google_translate.srt")
     print('final')
     return output_video_file
+def process_video(video, clip_type):
+    mp3_file=extract_audio(video)
+    wordlevel_info=transcribe(mp3_file)
+    subtitles = create_subtitles(wordlevel_info)
+    subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
+    srt_string = read_srt_file(subtitle_file)
+    google_translate = enhance_text(api_key, srt_string)
+    srt = write_google(google_translate)
+    #segments = pysrt.open(srt, encoding="utf-8")
+    sub = generate_translated_subtitle("fa", google_translate, "video_subtitled")
+    output_video_file = video_edit(sub, video, input_audio= "audio.mp3")
+    return output_video_file
+with gr.Blocks() as demo:
+    gr.Markdown("Start typing below and then click **Run** to see the output.")
+    with gr.Column():
+        video_file_input = gr.Video(label="Upload Video File")
+        clip_type = gr.Dropdown(["auto edit", "default"], label="Clip Type")
+        btn = gr.Button("create")
+        video_file_output = gr.Video(label="result: ")
+        btn.click(fn=process_video, inputs=[video_file_input, clip_type], outputs=video_file_output)
+"""    with gr.Row():
+        vid_out = gr.Video()
+        srt_file = gr.File()
+        btn2 = gr.Button("transcribe")
+        gr.on(
+            triggers=[btn2.click],
+            fn=write_google,
+            inputs=out,
+        ).then(video_edit, [out, video_path_output, audio_path_output], outputs=[vid_out, srt_file])"""
+demo.launch(debug=True)