Spaces:

StarPigeon
/

ViDove

Sleeping

+# configuration for each task
+source_lang: EN
+target_lang: ZH
+field: General
+# ASR config
+ASR:
+  ASR_model: whisper
+  whisper_config:
+    whisper_model: tiny
+    method: stable
+# pre-process module config
+pre_process:
+  sentence_form: True
+  spell_check: False
+  term_correct: True
+# Translation module config
+translation:
+  model: gpt-4
+  chunk_size: 1000
+# post-process module config
+post_process:
+  check_len_and_split: True
+  remove_trans_punctuation: True
+# output type that user receive
+output_type:
+  subtitle: srt
+  video: True
+  bilingual: True

dict_util.py CHANGED Viewed

@@ -52,4 +52,27 @@ with open("../test.csv", "w", encoding='utf-8') as w:
     export_csv_dict(term_dict_sc2,w)
 ## for load pickle, just:
-# pickle.load(f)

     export_csv_dict(term_dict_sc2,w)
 ## for load pickle, just:
+# pickle.load(f)
+def form_dict(src_dict:list, tgt_dict:list) -> dict:
+    final_dict = {}
+    for idx, value in enumerate(src_dict):
+        for item in value:
+            final_dict.update({item:tgt_dict[idx]})
+    return final_dict
+class term_dict(dict):
+    def __init__(self, path, src_lang, tgt_lang) -> None:
+        with open(f"{path}/{src_lang}.csv", 'r', encoding="utf-8") as file:
+            src_dict = list(csv.reader(file, delimiter=","))
+        with open(f"{path}/{tgt_lang}.csv", 'r', encoding="utf-8") as file:
+            tgt_dict = list(csv.reader(file, delimiter="," ))
+        super().__init__(form_dict(src_dict, tgt_dict))
+    def get(self, key:str) -> str:
+        word = self[key][randint(0,len(self[key])-1)]
+        return word

domain_dict/SC2/EN.csv ADDED Viewed

	@@ -0,0 +1,43 @@

+barracks
+zerg
+protoss
+terran
+engineering bay,engin bay
+forge
+blink
+evolution chamber
+cybernetics core,cybercore
+enhanced shockwaves
+gravitic boosters
+armory
+robotics bay,robo bay
+twilight council,twilight
+fusion core
+fleet beacon
+factory
+ghost academy
+infestation pit
+robotics facility,robo
+stargate
+starport
+archon
+smart servos
+gateway
+warpgate
+immortal
+zealot
+nydus network
+nydus worm
+hydralisk,hydra
+grooved spines
+muscular augments
+hydralisk den,hydra den
+planetary fortress
+battle cruiser
+weapon refit
+brood lord
+broodling
+greater spire
+anabolic synthesis
+cyclone
+bunker

domain_dict/SC2/ZH.csv ADDED Viewed

	@@ -0,0 +1,43 @@

+兵营
+虫族
+神族
+人族
+工程站,BE
+BF,锻炉
+闪现
+进化腔
+BY,赛博核心
+EMP范围
+ob速度
+军械库
+机械研究所,VB
+光影议会,VC
+聚变芯体
+舰队航标
+重工厂
+幽灵军校
+感染深渊
+VR,机械台
+神族VS,星门
+星港,人族VS
+白球
+变形加速
+传送门
+折跃门
+不朽
+叉叉
+虫道网络
+坑道虫
+刺蛇
+刺蛇射程
+刺蛇速度
+刺蛇塔
+大地堡,行星要塞
+大和
+大和炮
+大龙
+巢虫
+大龙塔
+大牛速度
+导弹车
+地堡

entries/__init_lib_path.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import sys
+import os
+def add_path(custom_path):
+    if custom_path not in sys.path: sys.path.insert(0, custom_path)
+this_dir = os.path.dirname(__file__)
+lib_path = os.path.join(this_dir, '..')
+add_path(lib_path)

entries/app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import __init_lib_path
+import gradio as gr
+from src.task import Task
+import logging
+from yaml import Loader, Dumper, load, dump
+import os
+from pathlib import Path
+from datetime import datetime
+import shutil
+from uuid import uuid4
+launch_config = "./configs/local_launch.yaml"
+task_config = './configs/task_config.yaml'
+def init(output_type, src_lang, tgt_lang, domain):
+    launch_cfg = load(open(launch_config), Loader=Loader)
+    task_cfg = load(open(task_config), Loader=Loader)
+    # overwrite config file
+    task_cfg["source_lang"] = src_lang
+    task_cfg["target_lang"] = tgt_lang
+    task_cfg["field"] = domain
+    if "Video File" in output_type:
+        task_cfg["output_type"]["video"] = True
+    else:
+        task_cfg["output_type"]["video"] = False
+    if "Bilingual" in output_type:
+        task_cfg["output_type"]["bilingual"] = True
+    else:
+        task_cfg["output_type"]["bilingual"] = False
+    if ".ass output" in output_type:
+        task_cfg["output_type"]["subtitle"] = "ass"
+    else:
+        task_cfg["output_type"]["subtitle"] = "srt"
+    # initialize dir
+    local_dir = Path(launch_cfg['local_dump'])
+    if not local_dir.exists():
+        local_dir.mkdir(parents=False, exist_ok=False)
+    # get task id
+    task_id = str(uuid4())
+    # create locak dir for the task
+    task_dir = local_dir.joinpath(f"task_{task_id}")
+    task_dir.mkdir(parents=False, exist_ok=False)
+    task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
+    # logging setting
+    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
+    logging.basicConfig(level=logging.INFO, format=logfmt, handlers=[
+        logging.FileHandler(
+            "{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
+            'w', encoding='utf-8')])
+    return task_id, task_dir, task_cfg
+def process_input(video_file, youtube_link, src_lang, tgt_lang, domain, output_type):
+    task_id, task_dir, task_cfg = init(output_type, src_lang, tgt_lang, domain)
+    if youtube_link:
+        task = Task.fromYoutubeLink(youtube_link, task_id, task_dir, task_cfg)
+        task.run()
+        return task.result
+    elif video_file is not None:
+        task = Task.fromVideoFile(video_file, task_id, task_dir, task_cfg)
+        task.run()
+        return task.result
+    else:
+        return None
+demo = gr.Interface(fn=process_input,
+    inputs=[
+        gr.components.Video(label="Upload a video"),
+        gr.components.Textbox(label="Or enter a YouTube URL"),
+        gr.components.Dropdown(choices=["EN", "ZH"], label="Select Source Language"),
+        gr.components.Dropdown(choices=["ZH", "EN"], label="Select Target Language"),
+        gr.components.Dropdown(choices=["General", "SC2"], label="Select Domain"),
+        gr.CheckboxGroup(["Video File", "Bilingual", ".ass output"], label="Output Settings", info="What do you want?"),
+    ],
+    outputs=[
+        gr.components.Video(label="Processed Video")
+    ],
+    title="ViDove: video translation toolkit demo",
+    description="Upload a video or enter a YouTube URL."
+    )
+if __name__ == "__main__":
+    demo.launch()

entries/run.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import __init_lib_path
+import logging
+from yaml import Loader, Dumper, load, dump
+from src.task import Task
+import openai
+import argparse
+import os
+from pathlib import Path
+from datetime import datetime
+import shutil
+from uuid import uuid4
+"""
+    Main entry for terminal environment.
+    Use it for debug and development purpose.
+    Usage: python3 entries/run.py [-h] [--link LINK] [--video_file VIDEO_FILE] [--audio_file AUDIO_FILE] [--srt_file SRT_FILE] [--continue CONTINUE]
+              [--launch_cfg LAUNCH_CFG] [--task_cfg TASK_CFG]
+"""
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
+    parser.add_argument("--video_file", help="local video path here", default=None, type=str, required=False)
+    parser.add_argument("--audio_file", help="local audio path here", default=None, type=str, required=False)
+    parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
+    parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
+    parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
+    parser.add_argument("--task_cfg", help="task config path", default='./configs/task_config.yaml', type=str, required=False)
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    # read args and configs
+    args = parse_args()
+    launch_cfg = load(open(args.launch_cfg), Loader=Loader)
+    task_cfg = load(open(args.task_cfg), Loader=Loader)
+    # initialize dir
+    local_dir = Path(launch_cfg['local_dump'])
+    if not local_dir.exists():
+        local_dir.mkdir(parents=False, exist_ok=False)
+    # get task id
+    task_id = str(uuid4())
+    # create locak dir for the task
+    task_dir = local_dir.joinpath(f"task_{task_id}")
+    task_dir.mkdir(parents=False, exist_ok=False)
+    task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
+    # logging setting
+    logfmt = "%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s"
+    logging.basicConfig(level=logging.INFO, format=logfmt, handlers=[
+        logging.FileHandler(
+            "{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
+            'w', encoding='utf-8')])
+    # Task create
+    if args.link is not None:
+        try:
+            task = Task.fromYoutubeLink(args.link, task_id, task_dir, task_cfg)
+        except:
+            shutil.rmtree(task_dir)
+            raise RuntimeError("failed to create task from youtube link")
+    elif args.video_file is not None:
+        try:
+            task = Task.fromVideoFile(args.video_file, task_id, task_dir, task_cfg)
+        except:
+            shutil.rmtree(task_dir)
+            raise RuntimeError("failed to create task from youtube link")
+    elif args.audio_file is not None:
+        try:
+            task = Task.fromVideoFile(args.audio_file, task_id, task_dir, task_cfg)
+        except:
+            shutil.rmtree(task_dir)
+            raise RuntimeError("failed to create task from youtube link")
+    # add task to the status queue
+    task.run()

entries/web_backend.py ADDED Viewed

File without changes

requirement.txt CHANGED Viewed

@@ -5,6 +5,7 @@ attrs==22.2.0
 certifi==2022.12.7
 charset-normalizer==3.1.0
 ffmpeg-python==0.2.0
 filelock==3.10.0
 frozenlist==1.3.3
 future==0.18.3
@@ -23,12 +24,13 @@ openai-whisper @ git+https://github.com/openai/whisper.git@6dea21fd7f7253bfe450f
 panda==0.3.1
 pandas==1.5.3
 python-dateutil==2.8.2
-pytube==12.1.2
 pytube3==9.6.4
 pytz==2022.7.1
 regex==2022.10.31
 requests==2.28.2
 six==1.16.0
 sympy==1.11.1
 tiktoken==0.3.1
 torch==2.0.0

 certifi==2022.12.7
 charset-normalizer==3.1.0
 ffmpeg-python==0.2.0
+Flask==2.3.3
 filelock==3.10.0
 frozenlist==1.3.3
 future==0.18.3
 panda==0.3.1
 pandas==1.5.3
 python-dateutil==2.8.2
+pytube==15.0.0
 pytube3==9.6.4
 pytz==2022.7.1
 regex==2022.10.31
 requests==2.28.2
 six==1.16.0
+stable-ts==2.9.0
 sympy==1.11.1
 tiktoken==0.3.1
 torch==2.0.0

src/Pigeon.py CHANGED Viewed

@@ -317,7 +317,7 @@ class Pigeon(object):
         logging.info("--------------------Start Preprocessing SRT class--------------------")
         self.srt.write_srt_file_src(self.srt_path)
         self.srt.form_whole_sentence()
-        self.srt.spell_check_term()
         self.srt.correct_with_force_term()
         processed_srt_file_en = str(Path(self.srt_path).with_suffix('')) + '_processed.srt'
         self.srt.write_srt_file_src(processed_srt_file_en)

         logging.info("--------------------Start Preprocessing SRT class--------------------")
         self.srt.write_srt_file_src(self.srt_path)
         self.srt.form_whole_sentence()
+        # self.srt.spell_check_term()
         self.srt.correct_with_force_term()
         processed_srt_file_en = str(Path(self.srt_path).with_suffix('')) + '_processed.srt'
         self.srt.write_srt_file_src(processed_srt_file_en)

src/preprocess/audio_extract.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import pathlib
+import os
+import subprocess
+def extract_audio(local_video_path: str, save_dir_path: str = "./downloads/audio") -> str:
+    if os.name == 'nt':
+        NotImplementedError("Filename extraction on Windows not yet implemented")
+    out_file_name = os.path.basename(local_video_path)
+    audio_path_out = save_dir_path.join("/").join(out_file_name)
+    subprocess.run(['ffmpeg', '-i', local_video_path, '-f', 'mp3', '-ab', '192000', '-vn', audio_path_out])
+    return audio_path_out

src/preprocess/video_download.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pytube import YouTube
+import logging
+def download_youtube_to_local_file(youtube_url: str, local_dir_path: str = "./downloads") -> str:
+    yt = YouTube(youtube_url)
+    try:
+        audio = yt.streams.filter(only_audio=True, file_extension='mp4').order_by('abr').desc().first()
+        # video = yt.streams.filter(file_extension='mp4').order_by('resolution').asc().first()
+        if audio:
+            saved_audio = audio.download(output_path=local_dir_path.join("/audio"))
+            logging.info(f"Audio download successful: {saved_audio}")
+            return saved_audio
+        else:
+            logging.error(f"Audio stream not found in {youtube_url}")
+            raise f"Audio stream not found in {youtube_url}"
+    except Exception as e:
+        # print("Connection Error: ", end='')
+        print(e)
+        raise e

src/srt_util/srt.py CHANGED Viewed

@@ -7,10 +7,59 @@ from datetime import timedelta
 import logging
 import openai
 from tqdm import tqdm
 class SrtSegment(object):
-    def __init__(self, *args) -> None:
         if isinstance(args[0], dict):
             segment = args[0]
             self.start = segment['start']
@@ -54,6 +103,7 @@ class SrtSegment(object):
                 self.translation = ""
             else:
                 self.translation = args[0][3]
     def merge_seg(self, seg):
         """
@@ -83,12 +133,14 @@ class SrtSegment(object):
     def remove_trans_punc(self) -> None:
         """
-        remove CN punctuations in translation text
         :return: None
         """
-        punc_cn = "，。！？"
-        translator = str.maketrans(punc_cn, ' ' * len(punc_cn))
-        self.translation = self.translation.translate(translator)
     def __str__(self) -> str:
         return f'{self.duration}\n{self.source_text}\n\n'
@@ -101,11 +153,25 @@ class SrtSegment(object):
 class SrtScript(object):
-    def __init__(self, segments) -> None:
-        self.segments = [SrtSegment(seg) for seg in segments]
     @classmethod
-    def parse_from_srt_file(cls, path: str):
         with open(path, 'r', encoding="utf-8") as f:
             script_lines = [line.rstrip() for line in f.readlines()]
         bilingual = False
@@ -119,7 +185,7 @@ class SrtScript(object):
             for i in range(0, len(script_lines), 4):
                 segments.append(list(script_lines[i:i + 4]))
-        return cls(segments)
     def merge_segs(self, idx_list) -> SrtSegment:
         """
@@ -147,9 +213,10 @@ class SrtScript(object):
         logging.info("Forming whole sentences...")
         merge_list = []  # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
         sentence = []
         # Get each entire sentence of distinct segments, fill indices to merge_list
         for i, seg in enumerate(self.segments):
-            if seg.source_text[-1] in ['.', '!', '?'] and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
                 sentence.append(i)
                 merge_list.append(sentence)
                 sentence = []
@@ -184,19 +251,20 @@ class SrtScript(object):
             src_text += '\n\n'
         def inner_func(target, input_str):
             response = openai.ChatCompletion.create(
-                # model=model,
                 model="gpt-4",
                 messages=[
                     {"role": "system",
-                     "content": "你的任务是按照要求合并或拆分句子到指定行数，你需要尽可能保证句意，但必要时可以将一句话分为两行输出"},
-                    {"role": "system", "content": "注意：你只需要输出处理过的中文句子，如果你要输出序号，请使用冒号隔开"},
-                    {"role": "user", "content": '请将下面的句子拆分或组合为{}句:\n{}'.format(target, input_str)}
                 ],
                 temperature=0.15
             )
             return response['choices'][0]['message']['content'].strip()
         lines = translate.split('\n\n')
         if len(lines) < (end_seg_id - start_seg_id + 1):
             count = 0
@@ -204,28 +272,27 @@ class SrtScript(object):
             while count < 5 and len(lines) != (end_seg_id - start_seg_id + 1):
                 count += 1
                 print("Solving Unmatched Lines|iteration {}".format(count))
                 flag = True
                 while flag:
                     flag = False
-                    # print("translate:")
-                    # print(translate)
                     try:
-                        # print("target")
-                        # print(end_seg_id - start_seg_id + 1)
                         translate = inner_func(end_seg_id - start_seg_id + 1, translate)
                     except Exception as e:
                         print("An error has occurred during solving unmatched lines:", e)
                         print("Retrying...")
                         flag = True
                 lines = translate.split('\n')
-                # print("result")
-                # print(len(lines))
             if len(lines) < (end_seg_id - start_seg_id + 1):
                 solved = False
                 print("Failed Solving unmatched lines, Manually parse needed")
             if not os.path.exists("./logs"):
                 os.mkdir("./logs")
             if video_link:
@@ -244,7 +311,7 @@ class SrtScript(object):
                         log.write("range_of_text,iterations_solving,solved,file_length,video_name" + "\n")
                     log.write(str(id_range) + ',' + str(count) + ',' + str(solved) + ',' + str(
                         len(self.segments)) + ',' + video_name + "\n")
-            print(lines)
         for i, seg in enumerate(self.segments[start_seg_id - 1:end_seg_id]):
             # naive way to due with merge translation problem
@@ -262,24 +329,27 @@ class SrtScript(object):
     def split_seg(self, seg, text_threshold, time_threshold):
         # evenly split seg to 2 parts and add new seg into self.segments
         # ignore the initial comma to solve the recursion problem
         if len(seg.source_text) > 2:
-            if seg.source_text[:2] == ', ':
                 seg.source_text = seg.source_text[2:]
-        if seg.translation[0] == '，':
             seg.translation = seg.translation[1:]
         source_text = seg.source_text
         translation = seg.translation
         # split the text based on commas
-        src_commas = [m.start() for m in re.finditer(',', source_text)]
-        trans_commas = [m.start() for m in re.finditer('，', translation)]
         if len(src_commas) != 0:
             src_split_idx = src_commas[len(src_commas) // 2] if len(src_commas) % 2 == 1 else src_commas[
                 len(src_commas) // 2 - 1]
         else:
             src_space = [m.start() for m in re.finditer(' ', source_text)]
             if len(src_space) > 0:
                 src_split_idx = src_space[len(src_space) // 2] if len(src_space) % 2 == 1 else src_space[
@@ -315,14 +385,14 @@ class SrtScript(object):
         seg1_dict['text'] = src_seg1
         seg1_dict['start'] = start_seg1
         seg1_dict['end'] = end_seg1
-        seg1 = SrtSegment(seg1_dict)
         seg1.translation = trans_seg1
         seg2_dict = {}
         seg2_dict['text'] = src_seg2
         seg2_dict['start'] = start_seg2
         seg2_dict['end'] = end_seg2
-        seg2 = SrtSegment(seg2_dict)
         seg2.translation = trans_seg2
         result_list = []
@@ -353,8 +423,6 @@ class SrtScript(object):
         self.segments = segments
         logging.info("check_len_and_split finished")
-        pass
     def check_len_and_split_range(self, range, text_threshold=30, time_threshold=1.0):
         # DEPRECATED
         # if sentence length >= text_threshold, split this segments to two
@@ -376,22 +444,24 @@ class SrtScript(object):
     def correct_with_force_term(self):
         ## force term correction
         logging.info("performing force term correction")
-        # load term dictionary
-        with open("finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
-            term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
-        keywords = list(term_enzh_dict.keys())
-        keywords.sort(key=lambda x: len(x), reverse=True)
-        for word in keywords:
-            for i, seg in enumerate(self.segments):
-                if word in seg.source_text.lower():
-                    seg.source_text = re.sub(fr"({word}es|{word}s?)\b", "{}".format(term_enzh_dict.get(word)),
-                                             seg.source_text, flags=re.IGNORECASE)
-                    logging.info(
-                        "replace term: " + word + " --> " + term_enzh_dict.get(word) + " in time stamp {}".format(
-                            i + 1))
-                    logging.info("source text becomes: " + seg.source_text)
     comp_dict = []
@@ -425,6 +495,12 @@ class SrtScript(object):
     def spell_check_term(self):
         logging.info("performing spell check")
         import enchant
         dict = enchant.Dict('en_US')
         term_spellDict = enchant.PyPWL('./finetune_data/dict_freq.txt')
@@ -532,3 +608,27 @@ class SrtScript(object):
                 f.write(f'{i + idx}\n')
                 f.write(seg.get_bilingual_str())
         pass

 import logging
 import openai
 from tqdm import tqdm
+import dict_util
+# punctuation dictionary for supported languages
+punctuation_dict = {
+    "EN": {
+        "punc_str": ". , ? ! : ; - ( ) [ ] { }",
+        "comma": ", ",
+        "sentence_end": [".", "!", "?", ";"]
+    },
+    "ES": {
+        "punc_str": ". , ? ! : ; - ( ) [ ] { } ¡ ¿",
+        "comma": ", ",
+        "sentence_end": [".", "!", "?", ";", "¡", "¿"]
+    },
+    "FR": {
+        "punc_str": ".,?!:;«»—",
+        "comma": ", ",
+        "sentence_end": [".", "!", "?", ";"]
+    },
+    "DE": {
+        "punc_str": ".,?!:;„“–",
+        "comma": ", ",
+        "sentence_end": [".", "!", "?", ";"]
+    },
+    "RU": {
+        "punc_str": ".,?!:;-«»—",
+        "comma": ", ",
+        "sentence_end": [".", "!", "?", ";"]
+    },
+    "ZH": {
+        "punc_str": "。，？！：；（）",
+        "comma": "，",
+        "sentence_end": ["。", "！", "？"]
+    },
+    "JA": {
+        "punc_str": "。、？！：；（）",
+        "comma": "、",
+        "sentence_end": ["。", "！", "？"]
+    },
+    "AR": {
+        "punc_str": ".,?!:;-()[]،؛ ؟ «»",
+        "comma": "، ",
+        "sentence_end": [".", "!", "?", ";", "؟"]
+    },
+}
+dict_path = "./domain_dict"
 class SrtSegment(object):
+    def __init__(self, src_lang, tgt_lang, *args) -> None:
+        self.src_lang = src_lang
+        self.tgt_lang = tgt_lang
         if isinstance(args[0], dict):
             segment = args[0]
             self.start = segment['start']
                 self.translation = ""
             else:
                 self.translation = args[0][3]
     def merge_seg(self, seg):
         """
     def remove_trans_punc(self) -> None:
         """
+        remove punctuations in translation text
         :return: None
         """
+        punc_str = punctuation_dict[self.tgt_lang]["punc_str"]
+        for punc in punc_str:
+            self.translation = self.translation.replace(punc, ' ')
+        # translator = str.maketrans(punc, ' ' * len(punc))
+        # self.translation = self.translation.translate(translator)
     def __str__(self) -> str:
         return f'{self.duration}\n{self.source_text}\n\n'
 class SrtScript(object):
+    def __init__(self, src_lang, tgt_lang, segments, domain="General") -> None:
+        self.domain = domain
+        self.src_lang = src_lang
+        self.tgt_lang = tgt_lang
+        self.segments = [SrtSegment(self.src_lang, self.tgt_lang, seg) for seg in segments]
+        if self.domain != "General":
+            if os.path.exists(f"{dict_path}/{self.domain}") and\
+                              os.path.exists(f"{dict_path}/{self.domain}/{src_lang}.csv") and os.path.exists(f"{dict_path}/{self.domain}/{tgt_lang}.csv" ):
+                # TODO: load dictionary
+                self.dict = dict_util.term_dict(f"{dict_path}/{self.domain}", src_lang, tgt_lang)
+                ...
+            else:
+                logging.error(f"domain {self.domain} or related dictionary({src_lang} or {tgt_lang}) doesn't exist, fallback to general domain, this will disable correct_with_force_term and spell_check_term")
+                self.domain = "General"
     @classmethod
+    def parse_from_srt_file(cls, src_lang, tgt_lang, path: str):
         with open(path, 'r', encoding="utf-8") as f:
             script_lines = [line.rstrip() for line in f.readlines()]
         bilingual = False
             for i in range(0, len(script_lines), 4):
                 segments.append(list(script_lines[i:i + 4]))
+        return cls(src_lang, tgt_lang, segments)
     def merge_segs(self, idx_list) -> SrtSegment:
         """
         logging.info("Forming whole sentences...")
         merge_list = []  # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
         sentence = []
+        ending_puncs = punctuation_dict[self.src_lang]["sentence_end"]
         # Get each entire sentence of distinct segments, fill indices to merge_list
         for i, seg in enumerate(self.segments):
+            if seg.source_text[-1] in ending_puncs and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
                 sentence.append(i)
                 merge_list.append(sentence)
                 sentence = []
             src_text += '\n\n'
         def inner_func(target, input_str):
+            # handling merge sentences issue.
             response = openai.ChatCompletion.create(
                 model="gpt-4",
                 messages=[
                     {"role": "system",
+                     "content": "Your task is to merge or split sentences into a specified number of lines as required. You need to ensure the meaning of the sentences as much as possible, but when necessary, a sentence can be divided into two lines for output"},
+                    {"role": "system", "content": "Note: You only need to output the processed {} sentences. If you need to output a sequence number, please separate it with a colon.".format(self.tgt_lang)},
+                    {"role": "user", "content": 'Please split or combine the following sentences into {} sentences:\n{}'.format(target, input_str)}
                 ],
                 temperature=0.15
             )
             return response['choices'][0]['message']['content'].strip()
+        # handling merge sentences issue.
         lines = translate.split('\n\n')
         if len(lines) < (end_seg_id - start_seg_id + 1):
             count = 0
             while count < 5 and len(lines) != (end_seg_id - start_seg_id + 1):
                 count += 1
                 print("Solving Unmatched Lines|iteration {}".format(count))
+                logging.error("Solving Unmatched Lines|iteration {}".format(count))
                 flag = True
                 while flag:
                     flag = False
                     try:
                         translate = inner_func(end_seg_id - start_seg_id + 1, translate)
                     except Exception as e:
                         print("An error has occurred during solving unmatched lines:", e)
                         print("Retrying...")
+                        logging.error("An error has occurred during solving unmatched lines:", e)
+                        logging.error("Retrying...")
                         flag = True
                 lines = translate.split('\n')
             if len(lines) < (end_seg_id - start_seg_id + 1):
                 solved = False
                 print("Failed Solving unmatched lines, Manually parse needed")
+                logging.error("Failed Solving unmatched lines, Manually parse needed")
+            # FIXME: put the error log in our log file
             if not os.path.exists("./logs"):
                 os.mkdir("./logs")
             if video_link:
                         log.write("range_of_text,iterations_solving,solved,file_length,video_name" + "\n")
                     log.write(str(id_range) + ',' + str(count) + ',' + str(solved) + ',' + str(
                         len(self.segments)) + ',' + video_name + "\n")
+            # print(lines)
         for i, seg in enumerate(self.segments[start_seg_id - 1:end_seg_id]):
             # naive way to due with merge translation problem
     def split_seg(self, seg, text_threshold, time_threshold):
         # evenly split seg to 2 parts and add new seg into self.segments
         # ignore the initial comma to solve the recursion problem
+        src_comma_str = punctuation_dict[self.src_lang]["comma"]
+        tgt_comma_str = punctuation_dict[self.tgt_lang]["comma"]
         if len(seg.source_text) > 2:
+            if seg.source_text[:2] == src_comma_str:
                 seg.source_text = seg.source_text[2:]
+        if seg.translation[0] == tgt_comma_str:
             seg.translation = seg.translation[1:]
         source_text = seg.source_text
         translation = seg.translation
         # split the text based on commas
+        src_commas = [m.start() for m in re.finditer(src_comma_str, source_text)]
+        trans_commas = [m.start() for m in re.finditer(tgt_comma_str, translation)]
         if len(src_commas) != 0:
             src_split_idx = src_commas[len(src_commas) // 2] if len(src_commas) % 2 == 1 else src_commas[
                 len(src_commas) // 2 - 1]
         else:
+            # split the text based on spaces
             src_space = [m.start() for m in re.finditer(' ', source_text)]
             if len(src_space) > 0:
                 src_split_idx = src_space[len(src_space) // 2] if len(src_space) % 2 == 1 else src_space[
         seg1_dict['text'] = src_seg1
         seg1_dict['start'] = start_seg1
         seg1_dict['end'] = end_seg1
+        seg1 = SrtSegment(self.src_lang, self.tgt_lang, seg1_dict)
         seg1.translation = trans_seg1
         seg2_dict = {}
         seg2_dict['text'] = src_seg2
         seg2_dict['start'] = start_seg2
         seg2_dict['end'] = end_seg2
+        seg2 = SrtSegment(self.src_lang, self.tgt_lang, seg2_dict)
         seg2.translation = trans_seg2
         result_list = []
         self.segments = segments
         logging.info("check_len_and_split finished")
     def check_len_and_split_range(self, range, text_threshold=30, time_threshold=1.0):
         # DEPRECATED
         # if sentence length >= text_threshold, split this segments to two
     def correct_with_force_term(self):
         ## force term correction
         logging.info("performing force term correction")
+        # check domain
+        if self.domain == "General":
+            logging.info("General domain could not perform correct_with_force_term. skip this step.")
+            pass
+        else:
+            keywords = list(self.dict.keys())
+            keywords.sort(key=lambda x: len(x), reverse=True)
+            for word in keywords:
+                for i, seg in enumerate(self.segments):
+                    if word in seg.source_text.lower():
+                        seg.source_text = re.sub(fr"({word}es|{word}s?)\b", "{}".format(self.dict.get(word)),
+                                                seg.source_text, flags=re.IGNORECASE)
+                        logging.info(
+                            "replace term: " + word + " --> " + self.dict.get(word) + " in time stamp {}".format(
+                                i + 1))
+                        logging.info("source text becomes: " + seg.source_text)
     comp_dict = []
     def spell_check_term(self):
         logging.info("performing spell check")
+        # check domain
+        if self.domain == "General":
+            logging.info("General domain could not perform spell_check_term. skip this step.")
+            pass
         import enchant
         dict = enchant.Dict('en_US')
         term_spellDict = enchant.PyPWL('./finetune_data/dict_freq.txt')
                 f.write(f'{i + idx}\n')
                 f.write(seg.get_bilingual_str())
         pass
+def split_script(script_in, chunk_size=1000):
+    script_split = script_in.split('\n\n')
+    script_arr = []
+    range_arr = []
+    start = 1
+    end = 0
+    script = ""
+    for sentence in script_split:
+        if len(script) + len(sentence) + 1 <= chunk_size:
+            script += sentence + '\n\n'
+            end += 1
+        else:
+            range_arr.append((start, end))
+            start = end + 1
+            end += 1
+            script_arr.append(script.strip())
+            script = sentence + '\n\n'
+    if script.strip():
+        script_arr.append(script.strip())
+        range_arr.append((start, len(script_split) - 1))
+    assert len(script_arr) == len(range_arr)
+    return script_arr, range_arr

src/task.py ADDED Viewed

	@@ -0,0 +1,320 @@

+import threading
+import time
+import openai
+from pytube import YouTube
+from os import getenv, getcwd
+from pathlib import Path
+from enum import Enum, auto
+import logging
+import subprocess
+from src.srt_util.srt import SrtScript
+from src.srt_util.srt2ass import srt2ass
+from time import time, strftime, gmtime, sleep
+from src.translators.translation import get_translation, prompt_selector
+import torch
+import stable_whisper
+import shutil
+"""
+Youtube link
+    - link
+    - model
+    - output type
+Video file
+    - path
+    - model
+    - output type
+Audio file
+    - path
+    - model
+    - output type
+"""
+"""
+TaskID
+Progress: Enum
+Computing resrouce status
+SRT_Script : SrtScript
+    -  input module -> initialize (ASR module)
+    -  Pre-process
+    -  Translation  (%)
+    -  Post process (time stamp)
+    -  Output module: SRT_Script --> output(.srt)
+    -  (Optional) mp4
+"""
+class TaskStatus(str, Enum):
+    CREATED = 'CREATED'
+    INITIALIZING_ASR = 'INITIALIZING_ASR'
+    PRE_PROCESSING = 'PRE_PROCESSING'
+    TRANSLATING = 'TRANSLATING'
+    POST_PROCESSING = 'POST_PROCESSING'
+    OUTPUT_MODULE = 'OUTPUT_MODULE'
+class Task:
+    @property
+    def status(self):
+        with self.__status_lock:
+            return self.__status
+    @status.setter
+    def status(self, new_status):
+        with self.__status_lock:
+            self.__status = new_status
+    def __init__(self, task_id, task_local_dir, task_cfg):
+        self.__status_lock = threading.Lock()
+        self.__status = TaskStatus.CREATED
+        self.gpu_status = 0
+        openai.api_key = getenv("OPENAI_API_KEY")
+        self.task_id = task_id
+        self.task_local_dir = task_local_dir
+        self.ASR_setting = task_cfg["ASR"]
+        self.translation_setting = task_cfg["translation"]
+        self.translation_model = self.translation_setting["model"]
+        self.output_type = task_cfg["output_type"]
+        self.target_lang = task_cfg["target_lang"]
+        self.source_lang = task_cfg["source_lang"]
+        self.field = task_cfg["field"]
+        self.pre_setting = task_cfg["pre_process"]
+        self.post_setting = task_cfg["post_process"]
+        self.audio_path = None
+        self.SRT_Script = None
+        self.result = None
+        self.s_t = None
+        self.t_e = None
+        print(f"Task ID: {self.task_id}")
+        logging.info(f"Task ID: {self.task_id}")
+        logging.info(f"{self.source_lang} -> {self.target_lang} task in {self.field}")
+        logging.info(f"Translation Model: {self.translation_model}")
+        logging.info(f"subtitle_type: {self.output_type['subtitle']}")
+        logging.info(f"video_ouput: {self.output_type['video']}")
+        logging.info(f"bilingual_ouput: {self.output_type['bilingual']}")
+        logging.info("Pre-process setting:")
+        for key in self.pre_setting:
+            logging.info(f"{key}: {self.pre_setting[key]}")
+        logging.info("Post-process setting:")
+        for key in self.post_setting:
+            logging.info(f"{key}: {self.post_setting[key]}")
+    @staticmethod
+    def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
+        # convert to audio
+        logging.info("Task Creation method: Youtube Link")
+        return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
+    @staticmethod
+    def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
+        # get audio path
+        logging.info("Task Creation method: Audio File")
+        return AudioTask(task_id, task_dir, task_cfg, audio_path)
+    @staticmethod
+    def fromVideoFile(video_path, task_id, task_dir, task_cfg):
+        # get audio path
+        logging.info("Task Creation method: Video File")
+        return VideoTask(task_id, task_dir, task_cfg, video_path)
+    # Module 1 ASR: audio --> SRT_script
+    def get_srt_class(self):
+        # Instead of using the script_en variable directly, we'll use script_input
+        # TODO: setup ASR module like translator
+        self.status = TaskStatus.INITIALIZING_ASR
+        self.t_s = time()
+        method = self.ASR_setting["whisper_config"]["method"]
+        whisper_model = self.ASR_setting["whisper_config"]["whisper_model"]
+        src_srt_path = self.task_local_dir.joinpath(f"task_{self.task_id}_{self.source_lang}.srt")
+        if not Path.exists(src_srt_path):
+            # extract script from audio
+            logging.info("extract script from audio")
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            if method == "api":
+                with open(self.audio_path, 'rb') as audio_file:
+                    transcript = openai.Audio.transcribe(model="whisper-1", file=audio_file, response_format="srt")
+            elif method == "stable":
+                model = stable_whisper.load_model(whisper_model, device)
+                transcript = model.transcribe(str(self.audio_path), regroup=False,
+                                                  initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
+                (
+                    transcript
+                    .split_by_punctuation(['.', '。', '?'])
+                    .merge_by_gap(.15, max_words=3)
+                    .merge_by_punctuation([' '])
+                    .split_by_punctuation(['.', '。', '?'])
+                )
+                transcript = transcript.to_dict()
+            # after get the transcript, release the gpu resource
+            torch.cuda.empty_cache()
+        self.SRT_Script = SrtScript(self.source_lang, self.target_lang, transcript['segments'], self.field)
+        # save the srt script to local
+        self.SRT_Script.write_srt_file_src(src_srt_path)
+    # Module 2: SRT preprocess: perform preprocess steps
+    def preprocess(self):
+        self.status = TaskStatus.PRE_PROCESSING
+        logging.info("--------------------Start Preprocessing SRT class--------------------")
+        if self.pre_setting["sentence_form"]:
+            self.SRT_Script.form_whole_sentence()
+        if self.pre_setting["spell_check"]:
+            self.SRT_Script.spell_check_term()
+        if self.pre_setting["term_correct"]:
+            self.SRT_Script.correct_with_force_term()
+        processed_srt_path_src = str(Path(self.task_local_dir) / f'{self.task_id}_processed.srt')
+        self.SRT_Script.write_srt_file_src(processed_srt_path_src)
+        if self.output_type["subtitle"] == "ass":
+            logging.info("write English .srt file to .ass")
+            assSub_src = srt2ass(processed_srt_path_src, "default", "No", "Modest")
+            logging.info('ASS subtitle saved as: ' + assSub_src)
+        self.script_input = self.SRT_Script.get_source_only()
+        pass
+    def update_translation_progress(self, new_progress):
+        if self.progress == TaskStatus.TRANSLATING:
+            self.progress = TaskStatus.TRANSLATING.value[0], new_progress
+    # Module 3: perform srt translation
+    def translation(self):
+        logging.info("---------------------Start Translation--------------------")
+        prompt = prompt_selector(self.source_lang, self.target_lang, self.field)
+        get_translation(self.SRT_Script, self.translation_model, self.task_id, prompt, self.translation_setting['chunk_size'])
+    # Module 4: perform srt post process steps
+    def postprocess(self):
+        self.status = TaskStatus.POST_PROCESSING
+        logging.info("---------------------Start Post-processing SRT class---------------------")
+        if self.post_setting["check_len_and_split"]:
+            self.SRT_Script.check_len_and_split()
+        if self.post_setting["remove_trans_punctuation"]:
+            self.SRT_Script.remove_trans_punctuation()
+        logging.info("---------------------Post-processing SRT class finished---------------------")
+    # Module 5: output module
+    def output_render(self):
+        self.status = TaskStatus.OUTPUT_MODULE
+        video_out = self.output_type["video"]
+        subtitle_type = self.output_type["subtitle"]
+        is_bilingual = self.output_type["bilingual"]
+        results_dir =f"{self.task_local_dir}/results"
+        subtitle_path = f"{results_dir}/{self.task_id}_{self.target_lang}.srt"
+        self.SRT_Script.write_srt_file_translate(subtitle_path)
+        if is_bilingual:
+            subtitle_path = f"{results_dir}/{self.task_id}_{self.source_lang}_{self.target_lang}.srt"
+            self.SRT_Script.write_srt_file_bilingual(subtitle_path)
+        if subtitle_type == "ass":
+            logging.info("write .srt file to .ass")
+            subtitle_path = srt2ass(subtitle_path, "default", "No", "Modest")
+            logging.info('ASS subtitle saved as: ' + subtitle_path)
+        final_res = subtitle_path
+        # encode to .mp4 video file
+        if video_out and self.video_path is not None:
+            logging.info("encoding video file")
+            logging.info(f'ffmpeg comand: \nffmpeg -i {self.video_path} -vf "subtitles={subtitle_path}" {results_dir}/{self.task_id}.mp4')
+            subprocess.run(
+                ["ffmpeg",
+                    "-i", self.video_path,
+                    "-vf", f"subtitles={subtitle_path}",
+                    f"{results_dir}/{self.task_id}.mp4"])
+            final_res = f"{results_dir}/{self.task_id}.mp4"
+        self.t_e = time()
+        logging.info(
+            "Pipeline finished, time duration:{}".format(strftime("%H:%M:%S", gmtime(self.t_e - self.t_s))))
+        return final_res
+    def run_pipeline(self):
+        self.get_srt_class()
+        self.preprocess()
+        self.translation()
+        self.postprocess()
+        self.result = self.output_render()
+        # print(self.result)
+class YoutubeTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, youtube_url):
+        super().__init__(task_id, task_local_dir, task_cfg)
+        self.youtube_url = youtube_url
+    def run(self):
+        yt = YouTube(self.youtube_url)
+        video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
+        if video:
+            video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
+            logging.info(f'Video Name: {video.default_filename}')
+        else:
+            raise FileNotFoundError(f" Video stream not found for link {self.youtube_url}")
+        audio = yt.streams.filter(only_audio=True).first()
+        if audio:
+            audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
+        else:
+            logging.info(" download audio failed, using ffmpeg to extract audio")
+            subprocess.run(
+                ['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
+                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
+            logging.info("audio extraction finished")
+        self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
+        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
+        logging.info(f" Video File Dir: {self.video_path}")
+        logging.info(f" Audio File Dir: {self.audio_path}")
+        logging.info(" Data Prep Complete. Start pipeline")
+        super().run_pipeline()
+class AudioTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, audio_path):
+        super().__init__(task_id, task_local_dir, task_cfg)
+        # TODO: check audio format
+        self.audio_path = audio_path
+        self.video_path = None
+    def run(self):
+        logging.info(f"Video File Dir: {self.video_path}")
+        logging.info(f"Audio File Dir: {self.audio_path}")
+        logging.info("Data Prep Complete. Start pipeline")
+        super().run_pipeline()
+class VideoTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, video_path):
+        super().__init__(task_id, task_local_dir, task_cfg)
+        # TODO: check video format {.mp4}
+        new_video_path = f"{task_local_dir}/task_{self.task_id}.mp4"
+        print(new_video_path)
+        logging.info(f"Copy video file to: {new_video_path}")
+        shutil.copyfile(video_path, new_video_path)
+        self.video_path = new_video_path
+    def run(self):
+        logging.info("using ffmpeg to extract audio")
+        subprocess.run(
+                ['ffmpeg', '-i', self.video_path, '-f', 'mp3',
+                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
+        logging.info("audio extraction finished")
+        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
+        logging.info(f" Video File Dir: {self.video_path}")
+        logging.info(f" Audio File Dir: {self.audio_path}")
+        logging.info("Data Prep Complete. Start pipeline")
+        super().run_pipeline()

src/translators/LLM_task.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import openai
+def LLM_task(model_name, input, task, temp = 0.15):
+    """
+    Translates input sentence with desired LLM.
+    :param model_name: The name of the translation model to be used.
+    :param input: Sentence for translation.
+    :param task: Prompt.
+    :param temp: Model temperature.
+    """
+    if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
+        response = openai.ChatCompletion.create(
+            model=model_name,
+            messages=[
+                {"role": "system","content": task},
+                {"role": "user", "content": input}
+            ],
+            temperature=temp
+        )
+        return response['choices'][0]['message']['content'].strip()
+    # Other LLM not implemented
+    else:
+        raise NotImplementedError

src/translators/__init__.py ADDED Viewed

File without changes

src/translators/translation.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from os import getenv
+import logging
+from time import sleep
+from tqdm import tqdm
+from src.srt_util.srt import split_script
+from .LLM_task import LLM_task
+def get_translation(srt, model, video_name, prompt, chunk_size = 1000):
+    script_arr, range_arr = split_script(srt.get_source_only(),chunk_size)
+    translate(srt, script_arr, range_arr, model, video_name, task=prompt)
+    pass
+def check_translation(sentence, translation):
+    """
+    check merge sentence issue from openai translation
+    """
+    sentence_count = sentence.count('\n\n') + 1
+    translation_count = translation.count('\n\n') + 1
+    if sentence_count != translation_count:
+        return False
+    else:
+        return True
+# TODO{david}: prompts selector
+def prompt_selector(src_lang, tgt_lang, domain):
+    language_map = {
+        "EN": "English",
+        "ZH": "Chinese",
+    }
+    src_lang = language_map[src_lang]
+    tgt_lang = language_map[tgt_lang]
+    prompt = f"""
+        you are a translation assistant, your job is to translate a video in domain of {domain} from {src_lang} to {tgt_lang},
+        you will be provided with a segement in {src_lang} parsed by line, where your translation text should keep the original
+        meaning and the number of lines.
+        """
+    return prompt
+def translate(srt, script_arr, range_arr, model_name, video_name=None, attempts_count=5, task=None, temp = 0.15):
+    """
+    Translates the given script array into another language using the chatgpt and writes to the SRT file.
+    This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
+    through sentences and range in the script and range arrays. If the translation check fails for five times, the function
+    will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
+    :param srt: An instance of the Subtitle class representing the SRT file.
+    :param script_arr: A list of strings representing the original script sentences to be translated.
+    :param range_arr: A list of tuples representing the start and end positions of sentences in the script.
+    :param model_name: The name of the translation model to be used.
+    :param video_name: The name of the video.
+    :param attempts_count: Number of attemps of failures for unmatched sentences.
+    :param task: Prompt.
+    :param temp: Model temperature.
+    """
+    if input is None:
+        raise Exception("Warning! No Input have passed to LLM!")
+    if task is None:
+        task = "你是一个翻译助理，你的任务是翻译视频，你会被提供一个按行分割的英文段落，你需要在保证句意和行数的情况下输出翻译后的文本。"
+    logging.info(f"translation prompt: {task}")
+    previous_length = 0
+    for sentence, range_ in tqdm(zip(script_arr, range_arr)):
+        # update the range based on previous length
+        range_ = (range_[0] + previous_length, range_[1] + previous_length)
+        # using chatgpt model
+        print(f"now translating sentences {range_}")
+        logging.info(f"now translating sentences {range_}")
+        flag = True
+        while flag:
+            flag = False
+            try:
+                translate = LLM_task(model_name, sentence, task, temp)
+                # detect merge sentence issue and try to solve for five times:
+                while not check_translation(sentence, translate) and attempts_count > 0:
+                    translate = LLM_task(model_name, sentence, task, temp)
+                    attempts_count -= 1
+                # if failure still happen, split into smaller tokens
+                if attempts_count == 0:
+                    single_sentences = sentence.split("\n\n")
+                    logging.info("merge sentence issue found for range", range_)
+                    translate = ""
+                    for i, single_sentence in enumerate(single_sentences):
+                        if i == len(single_sentences) - 1:
+                            translate += LLM_task(model_name,sentence,task,temp)
+                        else:
+                            translate += LLM_task(model_name,sentence,task,temp) + "\n\n"
+                    logging.info("solved by individually translation!")
+            except Exception as e:
+                logging.debug("An error has occurred during translation:", e)
+                print("An error has occurred during translation:", e)
+                print("Retrying... the script will continue after 30 seconds.")
+                sleep(30)
+                flag = True
+        srt.set_translation(translate, range_, model_name, video_name)

src/web/api_specs.yaml ADDED Viewed

	@@ -0,0 +1,79 @@

+openapi: 3.0.3
+info:
+  title: Pigeon AI
+  description: Pigeon AI
+  version: 1.0.0
+servers:
+  - url: 'https'
+paths:
+  /api/task:
+    post:
+      summary: Create a task
+      operationId: createTask
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/youtubeLink'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/task'
+  /api/task/{taskId}/status:
+    get:
+      summary: Get task status
+      operationId: getTask
+      parameters:
+        - name: taskId
+          in: path
+          required: true
+          description: task id
+          schema:
+            type: string
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/taskStatus'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/error'
+components:
+  schemas:
+    youtubeLink:
+      type: object
+      properties:
+        youtubeLink:
+          type: string
+          description: youtube link
+          example: https://www.youtube.com/watch?v=5qap5aO4i9A
+    task:
+      type: object
+      properties:
+        taskId:
+          type: string
+          description: task id generated by uuid
+          example: 7a765280-1a72-47e4-8747-8a38cdbaca91
+    taskStatus:
+      type: object
+      properties:
+        status:
+          type: string
+          description: task status
+          example: PROCESSING
+    error:
+      type: object
+      properties:
+        error:
+          type: string
+          description: error message
+          example: 'Invalid youtube link'

src/web/web.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import yaml
+from flask import Flask, request, jsonify
+from concurrent.futures import ThreadPoolExecutor
+from src.task import Task
+from uuid import uuid4
+app = Flask(__name__)
+# Global thread pool
+executor = ThreadPoolExecutor(max_workers=4)  # Adjust max_workers as per your requirement
+# thread safe task map to store task status
+task_map = {}
+@app.route('/api/task', methods=['POST'])
+def create_task_youtube():
+    global task_map
+    data = request.get_json()
+    if not data or 'youtubeLink' not in data:
+        return jsonify({'error': 'YouTube link not provided'}), 400
+    youtube_link = data['youtubeLink']
+    launch_config = yaml.load(open("./configs/local_launch.yaml"), Loader=yaml.Loader)
+    task_id = str(uuid4())
+    task = Task.fromYoutubeLink(youtube_link, task_id, launch_config)
+    task_map[task_id] = task
+    # Submit task to thread pool
+    executor.submit(task.run)
+    return jsonify({'taskId': task.task_id})
+@app.route('/api/task/<taskId>/status', methods=['GET'])
+def get_task_status(taskId):
+    global task_map
+    if taskId not in task_map:
+        return jsonify({'error': 'Task not found'}), 404
+    return jsonify({'status': task_map[taskId].status})
+if __name__ == '__main__':
+    app.run(debug=True)

tests/test_remove_punc.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import sys
+sys.path.append('./src')
+from srt_util.srt import SrtScript, SrtSegment
+zh_test1 = "再次，如果你对一些福利感兴趣，你也可以。"
+zh_en_test1 = "GG。Classic在我今年解说的最奇葩的系列赛中获得了胜利。"
+def form_srt_class(src_lang, tgt_lang, source_text="", translation="", duration="00:00:00,740 --> 00:00:08,779"):
+    segment = [0, duration, source_text, translation, ""]
+    return SrtScript(src_lang, tgt_lang, [segment])
+def test_zh():
+    srt = form_srt_class(src_lang="EN", tgt_lang="ZH", translation=zh_test1)
+    srt.remove_trans_punctuation()
+    assert srt.segments[0].translation == "再次 如果你对一些福利感兴趣 你也可以 "
+def test_zh_en():
+    srt = form_srt_class(src_lang="EN", tgt_lang="ZH", translation=zh_en_test1)
+    srt.remove_trans_punctuation()
+    assert srt.segments[0].translation == "GG Classic在我今年解说的最奇葩的系列赛中获得了胜利 "