Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

JiaenLiu commited on Nov 12, 2023

Commit

f144427

2 Parent(s): 04ef04e 54a5e67

Add new features and fix bugs

Browse files

Former-commit-id: 24553560a2196217ae598510434708733b3f1888

Files changed (6) hide show

configs/task_config.yaml +8 -4
entries/run.py +22 -19
src/srt_util/srt.py +24 -0
src/task.py +81 -35
src/translation/LLM_task.py +26 -0
src/translation/translation.py +88 -0

configs/task_config.yaml CHANGED Viewed

@@ -1,6 +1,10 @@
 # configuration for each task
-model: "gpt-4"
-local_dump: ./local_dump
-output_type: srt
-target_lang: CN
 field: SC2

 # configuration for each task
+model: gpt-4
+# output type that user receive
+output_type:
+  subtitle: srt
+  video: False
+  bilingal: False
+source_lang: EN
+target_lang: ZH
 field: SC2

entries/run.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 from pathlib import Path
 from datetime import datetime
 import shutil
 def parse_args():
     parser = argparse.ArgumentParser()
@@ -17,56 +18,58 @@ def parse_args():
     parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
     parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
     parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
     args = parser.parse_args()
     return args
 if __name__ == "__main__":
     args = parse_args()
     launch_cfg = load(open(args.launch_cfg), Loader=Loader)
     # initialize dir
     local_dir = Path(launch_cfg['local_dump'])
-    # initialize task queue
     if not local_dir.exists():
         local_dir.mkdir(parents=False, exist_ok=False)
-        f = open(local_dir.joinpath("task_queue.yaml"), "w")
-        f.write("Task Queue: []\n")
-        f.close()
     # get task id
-    tasks_queue = load(open(local_dir.joinpath("task_queue.yaml")), Loader = Loader)
-    task_list = tasks_queue['Task Queue']
-    task_id = len(task_list)
     # create locak dir for the task
     task_dir = local_dir.joinpath(f"task_{task_id}")
     task_dir.mkdir(parents=False, exist_ok=False)
     task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
-    task_dir.joinpath("logs").mkdir(parents=False, exist_ok=False)
-    f = open(task_dir.joinpath("task_info.yaml"), "w")
-    f.write(f"task_id: {task_id}")
-    f.close()
     logging.basicConfig(level=logging.INFO, handlers=[
         logging.FileHandler(
-            "{}/{}_{}.log".format(task_dir.joinpath("logs"), f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
             'w', encoding='utf-8')])
-    # task create
     if args.link is not None:
         try:
-            task = Task.fromYoutubeLink(args.link, task_id, launch_cfg)
         except:
             shutil.rmtree(task_dir)
             raise RuntimeError("failed to create task from youtube link")
     # add task to the status queue
-    task_list.append({"id": task_id, "status": "created", "resource_status:": "local"})
-    stream = open(local_dir.joinpath("task_queue.yaml"), "w")
-    dump(tasks_queue, stream)
-    task.run_pipeline()

 from pathlib import Path
 from datetime import datetime
 import shutil
+from uuid import uuid4
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
     parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
     parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
+    parser.add_argument("--task_cfg", help="task config path", default='./configs/task_config.yaml', type=str, required=False)
     args = parser.parse_args()
     return args
 if __name__ == "__main__":
+    # read args and configs
     args = parse_args()
     launch_cfg = load(open(args.launch_cfg), Loader=Loader)
+    task_cfg = load(open(args.task_cfg), Loader=Loader)
     # initialize dir
     local_dir = Path(launch_cfg['local_dump'])
     if not local_dir.exists():
         local_dir.mkdir(parents=False, exist_ok=False)
     # get task id
+    task_id = str(uuid4())
     # create locak dir for the task
     task_dir = local_dir.joinpath(f"task_{task_id}")
     task_dir.mkdir(parents=False, exist_ok=False)
     task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
+    # logging
     logging.basicConfig(level=logging.INFO, handlers=[
         logging.FileHandler(
+            "{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
             'w', encoding='utf-8')])
+    # Task create
     if args.link is not None:
         try:
+            task = Task.fromYoutubeLink(args.link, task_id, task_dir, task_cfg)
+        except:
+            shutil.rmtree(task_dir)
+            raise RuntimeError("failed to create task from youtube link")
+    elif args.video_file is not None:
+        try:
+            task = Task.fromVideoFile(args.video_file, task_id, task_dir, task_cfg)
+        except:
+            shutil.rmtree(task_dir)
+            raise RuntimeError("failed to create task from youtube link")
+    elif args.audio_file is not None:
+        try:
+            task = Task.fromVideoFile(args.audio_file, task_id, task_dir, task_cfg)
         except:
             shutil.rmtree(task_dir)
             raise RuntimeError("failed to create task from youtube link")
     # add task to the status queue
+    task.run()

src/srt_util/srt.py CHANGED Viewed

@@ -532,3 +532,27 @@ class SrtScript(object):
                 f.write(f'{i + idx}\n')
                 f.write(seg.get_bilingual_str())
         pass

                 f.write(f'{i + idx}\n')
                 f.write(seg.get_bilingual_str())
         pass
+def split_script(script_in, chunk_size=1000):
+    script_split = script_in.split('\n\n')
+    script_arr = []
+    range_arr = []
+    start = 1
+    end = 0
+    script = ""
+    for sentence in script_split:
+        if len(script) + len(sentence) + 1 <= chunk_size:
+            script += sentence + '\n\n'
+            end += 1
+        else:
+            range_arr.append((start, end))
+            start = end + 1
+            end += 1
+            script_arr.append(script.strip())
+            script = sentence + '\n\n'
+    if script.strip():
+        script_arr.append(script.strip())
+        range_arr.append((start, len(script_split) - 1))
+    assert len(script_arr) == len(range_arr)
+    return script_arr, range_arr

src/task.py CHANGED Viewed

@@ -11,6 +11,7 @@ import subprocess
 from src.srt_util.srt import SrtScript
 from src.srt_util.srt2ass import srt2ass
 from time import time, strftime, gmtime, sleep
 import torch
 import stable_whisper
@@ -66,39 +67,50 @@ class Task:
         with self.__status_lock:
             self.__status = new_status
-    def __init__(self, task_id, task_local_dir, launch_info):
         self.__status_lock = threading.Lock()
         self.__status = TaskStatus.CREATED
         openai.api_key = getenv("OPENAI_API_KEY")
-        self.launch_info = launch_info
         self.task_local_dir = task_local_dir
-        self.model = launch_info["model"]
         self.gpu_status = 0
-        self.output_type = launch_info["output_type"]
         self.task_id = task_id
-        self.progress = NotImplemented
         self.SRT_Script = None
         self.result = None
         self.s_t = None
         self.t_e = None
     @staticmethod
-    def fromYoutubeLink(youtube_url, task_id, launch_info):
         # convert to audio
-        logging.info("Task Creation method: Youtube Link")
-        local_dump = Path(launch_info['local_dump'])  # should get from launch config
-        return YoutubeTask(task_id, local_dump.joinpath(f"task_{task_id}"), launch_info, youtube_url)
     @staticmethod
-    def fromAudioFile():
-        #
-        return Task(...)
     @staticmethod
-    def fromVideoFile():
-        # convert to audio
-        return Task(...)
     # Module 1 ASR: audio --> SRT_script
     def get_srt_class(self, whisper_model='tiny', method="stable"):
@@ -141,17 +153,18 @@ class Task:
         time.sleep(5)
         pass
-    # Module 2: SRT preprocess: perform preprocess steps
     def preprocess(self):
         self.status = TaskStatus.PRE_PROCESSING
         logging.info("--------------------Start Preprocessing SRT class--------------------")
         self.SRT_Script.form_whole_sentence()
         # self.SRT_Script.spell_check_term()
         self.SRT_Script.correct_with_force_term()
-        processed_srt_path_en = str(Path(self.srt_path).with_suffix('')) + '_processed.srt'
         self.SRT_Script.write_srt_file_src(processed_srt_path_en)
-        if self.output_type == "ass":
             logging.info("write English .srt file to .ass")
             assSub_en = srt2ass(processed_srt_path_en)
             logging.info('ASS subtitle saved as: ' + assSub_en)
@@ -165,10 +178,9 @@ class Task:
     # Module 3: perform srt translation
     def translation(self):
         time.sleep(5)
         pass
     # Module 4: perform srt post process steps
@@ -210,7 +222,6 @@ class Task:
     def output_render(self):
         self.status = TaskStatus.OUTPUT_MODULE
         return "TODO"
-        pass
     def run_pipeline(self):
         self.get_srt_class()
@@ -220,33 +231,68 @@ class Task:
         self.result = self.output_render()
 class YoutubeTask(Task):
-    def __init__(self, task_id, task_local_dir, launch_info, youtube_url):
-        super().__init__(task_id, task_local_dir, launch_info)
         self.youtube_url = youtube_url
     def run(self):
         yt = YouTube(self.youtube_url)
-        local_dump = self.task_local_dir
         video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
         if video:
-            video.download(str(local_dump.joinpath(f"task_{self.task_id}")), filename=f"task_{self.task_id}.mp4")
-            logging.info(f'Video download completed to {local_dump.joinpath(f"task_{self.task_id}")}!')
         else:
-            raise FileNotFoundError(f"Video stream not found for link {self.youtube_url}")
         audio = yt.streams.filter(only_audio=True).first()
         if audio:
-            audio.download(str(local_dump.joinpath(f"task_{self.task_id}")), filename=f"task_{self.task_id}.mp3")
-            logging.info(f'Audio download completed to {local_dump.joinpath(f"task_{self.task_id}")}!')
         else:
-            logging.info("download audio failed, using ffmpeg to extract audio")
             subprocess.run(
-                ['ffmpeg', '-i', local_dump.joinpath(f"task_{self.task_id}").joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
-                 '-ab', '192000', '-vn', local_dump.joinpath(f"task_{self.task_id}").joinpath(f"task_{self.task_id}.mp3")])
             logging.info("audio extraction finished")
-        logging.info("Task Creation Complete.")
-        logging.info("Task Creation method: Youtube Link")
         super().run_pipeline()

 from src.srt_util.srt import SrtScript
 from src.srt_util.srt2ass import srt2ass
 from time import time, strftime, gmtime, sleep
+from translation.translation import get_translation, translate
 import torch
 import stable_whisper
         with self.__status_lock:
             self.__status = new_status
+    def __init__(self, task_id, task_local_dir, task_cfg):
         self.__status_lock = threading.Lock()
         self.__status = TaskStatus.CREATED
         openai.api_key = getenv("OPENAI_API_KEY")
+        self.launch_info = task_cfg # do not use, just for fallback
         self.task_local_dir = task_local_dir
+        self.model = task_cfg["model"]
         self.gpu_status = 0
+        self.output_type = task_cfg["output_type"]
+        self.target_lang = task_cfg["target_lang"]
+        self.source_lang = task_cfg["source_lang"]
+        self.field = task_cfg["field"]
         self.task_id = task_id
+        self.audio_path = None
         self.SRT_Script = None
         self.result = None
         self.s_t = None
         self.t_e = None
+        print(f" Task ID: {self.task_id}")
+        logging.info(f" Task ID: {self.task_id}")
+        logging.info(f" {self.source_lang} -> {self.target_lang} task in {self.field}")
+        logging.info(f" Model: \t\t\t{self.model}")
+        logging.info(f" subtitle_type: \t\t{self.output_type['subtitle']}")
+        logging.info(f" video_ouput: \t\t{self.output_type['video']}")
+        logging.info(f" bilingal_ouput: \t{self.output_type['bilingal']}")
     @staticmethod
+    def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
         # convert to audio
+        logging.info(" Task Creation method: Youtube Link")
+        return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
     @staticmethod
+    def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
+        # get audio path
+        logging.info(" Task Creation method: Audio File")
+        return AudioTask(task_id, task_dir, task_cfg, audio_path)
     @staticmethod
+    def fromVideoFile(video_path, task_id, task_dir, task_cfg):
+        # get audio path
+        logging.info(" Task Creation method: Video File")
+        return VideoTask(task_id, task_dir, task_cfg, video_path)
     # Module 1 ASR: audio --> SRT_script
     def get_srt_class(self, whisper_model='tiny', method="stable"):
         time.sleep(5)
         pass
+    # Module 2: SRT preprocess: perform preprocess steps
+    # TODO: multi-lang and multi-field support according to task_cfg
     def preprocess(self):
         self.status = TaskStatus.PRE_PROCESSING
         logging.info("--------------------Start Preprocessing SRT class--------------------")
         self.SRT_Script.form_whole_sentence()
         # self.SRT_Script.spell_check_term()
         self.SRT_Script.correct_with_force_term()
+        processed_srt_path_en = str(Path(self.task_local_dir).with_suffix('')) + '_processed.srt'
         self.SRT_Script.write_srt_file_src(processed_srt_path_en)
+        if self.output_type["subtitle"] == "ass":
             logging.info("write English .srt file to .ass")
             assSub_en = srt2ass(processed_srt_path_en)
             logging.info('ASS subtitle saved as: ' + assSub_en)
     # Module 3: perform srt translation
     def translation(self):
+        logging.info("---------------------Start Translation--------------------")
+        get_translation(self.srt,self.model, self.video_name, self.video_link)
         time.sleep(5)
         pass
     # Module 4: perform srt post process steps
     def output_render(self):
         self.status = TaskStatus.OUTPUT_MODULE
         return "TODO"
     def run_pipeline(self):
         self.get_srt_class()
         self.result = self.output_render()
 class YoutubeTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, youtube_url):
+        super().__init__(task_id, task_local_dir, task_cfg)
         self.youtube_url = youtube_url
     def run(self):
         yt = YouTube(self.youtube_url)
         video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
         if video:
+            video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
+            logging.info(f'Video Name: {video.default_filename}')
         else:
+            raise FileNotFoundError(f" Video stream not found for link {self.youtube_url}")
         audio = yt.streams.filter(only_audio=True).first()
         if audio:
+            audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
+            # logging.info(f'Audio download completed to {self.task_local_dir}!')
         else:
+            logging.info(" download audio failed, using ffmpeg to extract audio")
             subprocess.run(
+                ['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
+                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
             logging.info("audio extraction finished")
+        self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
+        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
+        logging.info(f" Video File Dir: {self.video_path}")
+        logging.info(f" Audio File Dir: {self.audio_path}")
+        logging.info(" Data Prep Complete. Start pipeline")
+        super().run_pipeline()
+class AudioTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, audio_path):
+        super().__init__(task_id, task_local_dir, task_cfg)
+        # TODO: check audio format
+        self.audio_path = audio_path
+        self.video_path = None
+    def run(self):
+        logging.info(f" Video File Dir: {self.video_path}")
+        logging.info(f" Audio File Dir: {self.audio_path}")
+        logging.info("Data Prep Complete. Start pipeline")
+        super().run_pipeline()
+class VideoTask(Task):
+    def __init__(self, task_id, task_local_dir, task_cfg, video_path):
+        super().__init__(task_id, task_local_dir, task_cfg)
+        # TODO: check video format {.mp4}
+        self.video_path = video_path
+    def run(self):
+        logging.info("using ffmpeg to extract audio")
+        subprocess.run(
+                ['ffmpeg', '-i', self.video_path, '-f', 'mp3',
+                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
+        logging.info("audio extraction finished")
+        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
+        logging.info(f" Video File Dir: {self.video_path}")
+        logging.info(f" Audio File Dir: {self.audio_path}")
+        logging.info("Data Prep Complete. Start pipeline")
         super().run_pipeline()

src/translation/LLM_task.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import openai
+def LLM_task(model_name, input, task, temp = 0.15):
+    """
+    Translates input sentence with desired LLM.
+    :param model_name: The name of the translation model to be used.
+    :param input: Sentence for translation.
+    :param task: Prompt.
+    :param temp: Model temperature.
+    """
+    if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
+        response = openai.ChatCompletion.create(
+            model=model_name,
+            messages=[
+                {"role": "system","content": task},
+                {"role": "user", "content": input}
+            ],
+            temperature=temp
+        )
+        return response['choices'][0]['message']['content'].strip()
+    # Other LLM not implemented
+    else:
+        raise NotImplementedError

src/translation/translation.py ADDED Viewed

	@@ -0,0 +1,88 @@

+from os import getenv
+import logging
+from time import sleep
+from tqdm import tqdm
+from src.srt_util.srt import split_script
+from LLM_task import LLM_task
+def get_translation(srt,model,video_name,video_link):
+    script_arr, range_arr = split_script(srt)
+    translate(srt, script_arr, range_arr, model, video_name, video_link)
+    pass
+def check_translation(sentence, translation):
+    """
+    check merge sentence issue from openai translation
+    """
+    sentence_count = sentence.count('\n\n') + 1
+    translation_count = translation.count('\n\n') + 1
+    if sentence_count != translation_count:
+        # print("sentence length: ", len(sentence), sentence_count)
+        # print("translation length: ",  len(translation), translation_count)
+        return False
+    else:
+        return True
+def translate(srt, script_arr, range_arr, model_name, video_name, video_link, attempts_count=5, task=None, temp = 0.15):
+    """
+    Translates the given script array into another language using the chatgpt and writes to the SRT file.
+    This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
+    through sentences and range in the script and range arrays. If the translation check fails for five times, the function
+    will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
+    :param srt: An instance of the Subtitle class representing the SRT file.
+    :param script_arr: A list of strings representing the original script sentences to be translated.
+    :param range_arr: A list of tuples representing the start and end positions of sentences in the script.
+    :param model_name: The name of the translation model to be used.
+    :param video_name: The name of the video.
+    :param video_link: The link to the video.
+    :param attempts_count: Number of attemps of failures for unmatched sentences.
+    :param task: Prompt.
+    :param temp: Model temperature.
+    """
+    #logging.info("Start translating...")
+    if input is None:
+        raise Exception("Warning! No Input have passed to LLM!")
+    if task is None:
+        task = "你是一个翻译助理，你的任务是翻译星际争霸视频，你会被提供一个按行分割的英文段落，你需要在保证句意和行数的情况下输出翻译后的文本。"
+    previous_length = 0
+    for sentence, range_ in tqdm(zip(script_arr, range_arr)):
+        # update the range based on previous length
+        range_ = (range_[0] + previous_length, range_[1] + previous_length)
+        # using chatgpt model
+        print(f"now translating sentences {range_}")
+        #logging.info(f"now translating sentences {range_}, time: {datetime.now()}")
+        flag = True
+        while flag:
+            flag = False
+            try:
+                translate = LLM_task(model_name, sentence)
+                # detect merge sentence issue and try to solve for five times:
+                while not check_translation(sentence, translate) and attempts_count > 0:
+                    translate = LLM_task(model_name,sentence,task,temp)
+                    attempts_count -= 1
+                # if failure still happen, split into smaller tokens
+                if attempts_count == 0:
+                    single_sentences = sentence.split("\n\n")
+                    logging.info("merge sentence issue found for range", range_)
+                    translate = ""
+                    for i, single_sentence in enumerate(single_sentences):
+                        if i == len(single_sentences) - 1:
+                            translate += LLM_task(model_name,sentence,task,temp)
+                        else:
+                            translate += LLM_task(model_name,sentence,task,temp) + "\n\n"
+                    logging.info("solved by individually translation!")
+            except Exception as e:
+                logging.debug("An error has occurred during translation:", e)
+                print("An error has occurred during translation:", e)
+                print("Retrying... the script will continue after 30 seconds.")
+                sleep(30)
+                flag = True
+        srt.set_translation(translate, range_, model_name, video_name, video_link)