JiaenLiu commited on
Commit
f144427
·
2 Parent(s): 04ef04e 54a5e67

Add new features and fix bugs

Browse files

Former-commit-id: 24553560a2196217ae598510434708733b3f1888

configs/task_config.yaml CHANGED
@@ -1,6 +1,10 @@
1
  # configuration for each task
2
- model: "gpt-4"
3
- local_dump: ./local_dump
4
- output_type: srt
5
- target_lang: CN
 
 
 
 
6
  field: SC2
 
1
  # configuration for each task
2
+ model: gpt-4
3
+ # output type that user receive
4
+ output_type:
5
+ subtitle: srt
6
+ video: False
7
+ bilingal: False
8
+ source_lang: EN
9
+ target_lang: ZH
10
  field: SC2
entries/run.py CHANGED
@@ -8,6 +8,7 @@ import os
8
  from pathlib import Path
9
  from datetime import datetime
10
  import shutil
 
11
 
12
  def parse_args():
13
  parser = argparse.ArgumentParser()
@@ -17,56 +18,58 @@ def parse_args():
17
  parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
18
  parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
19
  parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
 
20
  args = parser.parse_args()
21
 
22
  return args
23
 
24
  if __name__ == "__main__":
 
25
  args = parse_args()
26
  launch_cfg = load(open(args.launch_cfg), Loader=Loader)
 
27
 
28
  # initialize dir
29
  local_dir = Path(launch_cfg['local_dump'])
30
-
31
- # initialize task queue
32
  if not local_dir.exists():
33
  local_dir.mkdir(parents=False, exist_ok=False)
34
- f = open(local_dir.joinpath("task_queue.yaml"), "w")
35
- f.write("Task Queue: []\n")
36
- f.close()
37
 
38
  # get task id
39
- tasks_queue = load(open(local_dir.joinpath("task_queue.yaml")), Loader = Loader)
40
- task_list = tasks_queue['Task Queue']
41
- task_id = len(task_list)
42
 
43
  # create locak dir for the task
44
  task_dir = local_dir.joinpath(f"task_{task_id}")
45
  task_dir.mkdir(parents=False, exist_ok=False)
46
  task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
47
- task_dir.joinpath("logs").mkdir(parents=False, exist_ok=False)
48
- f = open(task_dir.joinpath("task_info.yaml"), "w")
49
- f.write(f"task_id: {task_id}")
50
- f.close()
51
 
 
52
  logging.basicConfig(level=logging.INFO, handlers=[
53
  logging.FileHandler(
54
- "{}/{}_{}.log".format(task_dir.joinpath("logs"), f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
55
  'w', encoding='utf-8')])
56
 
57
- # task create
58
  if args.link is not None:
59
  try:
60
- task = Task.fromYoutubeLink(args.link, task_id, launch_cfg)
 
 
 
 
 
 
 
 
 
 
 
 
61
  except:
62
  shutil.rmtree(task_dir)
63
  raise RuntimeError("failed to create task from youtube link")
64
 
65
  # add task to the status queue
66
- task_list.append({"id": task_id, "status": "created", "resource_status:": "local"})
67
- stream = open(local_dir.joinpath("task_queue.yaml"), "w")
68
- dump(tasks_queue, stream)
69
- task.run_pipeline()
70
 
71
 
72
 
 
8
  from pathlib import Path
9
  from datetime import datetime
10
  import shutil
11
+ from uuid import uuid4
12
 
13
  def parse_args():
14
  parser = argparse.ArgumentParser()
 
18
  parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
19
  parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
20
  parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
21
+ parser.add_argument("--task_cfg", help="task config path", default='./configs/task_config.yaml', type=str, required=False)
22
  args = parser.parse_args()
23
 
24
  return args
25
 
26
  if __name__ == "__main__":
27
+ # read args and configs
28
  args = parse_args()
29
  launch_cfg = load(open(args.launch_cfg), Loader=Loader)
30
+ task_cfg = load(open(args.task_cfg), Loader=Loader)
31
 
32
  # initialize dir
33
  local_dir = Path(launch_cfg['local_dump'])
 
 
34
  if not local_dir.exists():
35
  local_dir.mkdir(parents=False, exist_ok=False)
 
 
 
36
 
37
  # get task id
38
+ task_id = str(uuid4())
 
 
39
 
40
  # create locak dir for the task
41
  task_dir = local_dir.joinpath(f"task_{task_id}")
42
  task_dir.mkdir(parents=False, exist_ok=False)
43
  task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
 
 
 
 
44
 
45
+ # logging
46
  logging.basicConfig(level=logging.INFO, handlers=[
47
  logging.FileHandler(
48
+ "{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
49
  'w', encoding='utf-8')])
50
 
51
+ # Task create
52
  if args.link is not None:
53
  try:
54
+ task = Task.fromYoutubeLink(args.link, task_id, task_dir, task_cfg)
55
+ except:
56
+ shutil.rmtree(task_dir)
57
+ raise RuntimeError("failed to create task from youtube link")
58
+ elif args.video_file is not None:
59
+ try:
60
+ task = Task.fromVideoFile(args.video_file, task_id, task_dir, task_cfg)
61
+ except:
62
+ shutil.rmtree(task_dir)
63
+ raise RuntimeError("failed to create task from youtube link")
64
+ elif args.audio_file is not None:
65
+ try:
66
+ task = Task.fromVideoFile(args.audio_file, task_id, task_dir, task_cfg)
67
  except:
68
  shutil.rmtree(task_dir)
69
  raise RuntimeError("failed to create task from youtube link")
70
 
71
  # add task to the status queue
72
+ task.run()
 
 
 
73
 
74
 
75
 
src/srt_util/srt.py CHANGED
@@ -532,3 +532,27 @@ class SrtScript(object):
532
  f.write(f'{i + idx}\n')
533
  f.write(seg.get_bilingual_str())
534
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  f.write(f'{i + idx}\n')
533
  f.write(seg.get_bilingual_str())
534
  pass
535
+
536
+ def split_script(script_in, chunk_size=1000):
537
+ script_split = script_in.split('\n\n')
538
+ script_arr = []
539
+ range_arr = []
540
+ start = 1
541
+ end = 0
542
+ script = ""
543
+ for sentence in script_split:
544
+ if len(script) + len(sentence) + 1 <= chunk_size:
545
+ script += sentence + '\n\n'
546
+ end += 1
547
+ else:
548
+ range_arr.append((start, end))
549
+ start = end + 1
550
+ end += 1
551
+ script_arr.append(script.strip())
552
+ script = sentence + '\n\n'
553
+ if script.strip():
554
+ script_arr.append(script.strip())
555
+ range_arr.append((start, len(script_split) - 1))
556
+
557
+ assert len(script_arr) == len(range_arr)
558
+ return script_arr, range_arr
src/task.py CHANGED
@@ -11,6 +11,7 @@ import subprocess
11
  from src.srt_util.srt import SrtScript
12
  from src.srt_util.srt2ass import srt2ass
13
  from time import time, strftime, gmtime, sleep
 
14
 
15
  import torch
16
  import stable_whisper
@@ -66,39 +67,50 @@ class Task:
66
  with self.__status_lock:
67
  self.__status = new_status
68
 
69
- def __init__(self, task_id, task_local_dir, launch_info):
70
  self.__status_lock = threading.Lock()
71
  self.__status = TaskStatus.CREATED
72
  openai.api_key = getenv("OPENAI_API_KEY")
73
- self.launch_info = launch_info
74
  self.task_local_dir = task_local_dir
75
- self.model = launch_info["model"]
76
  self.gpu_status = 0
77
- self.output_type = launch_info["output_type"]
 
 
 
78
  self.task_id = task_id
79
- self.progress = NotImplemented
80
  self.SRT_Script = None
81
  self.result = None
82
  self.s_t = None
83
  self.t_e = None
84
 
 
 
 
 
 
 
 
85
 
86
  @staticmethod
87
- def fromYoutubeLink(youtube_url, task_id, launch_info):
88
  # convert to audio
89
- logging.info("Task Creation method: Youtube Link")
90
- local_dump = Path(launch_info['local_dump']) # should get from launch config
91
- return YoutubeTask(task_id, local_dump.joinpath(f"task_{task_id}"), launch_info, youtube_url)
92
 
93
  @staticmethod
94
- def fromAudioFile():
95
- #
96
- return Task(...)
 
97
 
98
  @staticmethod
99
- def fromVideoFile():
100
- # convert to audio
101
- return Task(...)
 
102
 
103
  # Module 1 ASR: audio --> SRT_script
104
  def get_srt_class(self, whisper_model='tiny', method="stable"):
@@ -141,17 +153,18 @@ class Task:
141
  time.sleep(5)
142
  pass
143
 
144
- # Module 2: SRT preprocess: perform preprocess steps
 
145
  def preprocess(self):
146
  self.status = TaskStatus.PRE_PROCESSING
147
  logging.info("--------------------Start Preprocessing SRT class--------------------")
148
  self.SRT_Script.form_whole_sentence()
149
  # self.SRT_Script.spell_check_term()
150
  self.SRT_Script.correct_with_force_term()
151
- processed_srt_path_en = str(Path(self.srt_path).with_suffix('')) + '_processed.srt'
152
  self.SRT_Script.write_srt_file_src(processed_srt_path_en)
153
 
154
- if self.output_type == "ass":
155
  logging.info("write English .srt file to .ass")
156
  assSub_en = srt2ass(processed_srt_path_en)
157
  logging.info('ASS subtitle saved as: ' + assSub_en)
@@ -165,10 +178,9 @@ class Task:
165
 
166
  # Module 3: perform srt translation
167
  def translation(self):
 
 
168
  time.sleep(5)
169
-
170
-
171
-
172
  pass
173
 
174
  # Module 4: perform srt post process steps
@@ -210,7 +222,6 @@ class Task:
210
  def output_render(self):
211
  self.status = TaskStatus.OUTPUT_MODULE
212
  return "TODO"
213
- pass
214
 
215
  def run_pipeline(self):
216
  self.get_srt_class()
@@ -220,33 +231,68 @@ class Task:
220
  self.result = self.output_render()
221
 
222
  class YoutubeTask(Task):
223
- def __init__(self, task_id, task_local_dir, launch_info, youtube_url):
224
- super().__init__(task_id, task_local_dir, launch_info)
225
  self.youtube_url = youtube_url
226
 
227
  def run(self):
228
  yt = YouTube(self.youtube_url)
229
- local_dump = self.task_local_dir
230
  video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
231
 
232
  if video:
233
- video.download(str(local_dump.joinpath(f"task_{self.task_id}")), filename=f"task_{self.task_id}.mp4")
234
- logging.info(f'Video download completed to {local_dump.joinpath(f"task_{self.task_id}")}!')
235
  else:
236
- raise FileNotFoundError(f"Video stream not found for link {self.youtube_url}")
237
 
238
  audio = yt.streams.filter(only_audio=True).first()
239
  if audio:
240
- audio.download(str(local_dump.joinpath(f"task_{self.task_id}")), filename=f"task_{self.task_id}.mp3")
241
- logging.info(f'Audio download completed to {local_dump.joinpath(f"task_{self.task_id}")}!')
242
  else:
243
- logging.info("download audio failed, using ffmpeg to extract audio")
244
  subprocess.run(
245
- ['ffmpeg', '-i', local_dump.joinpath(f"task_{self.task_id}").joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
246
- '-ab', '192000', '-vn', local_dump.joinpath(f"task_{self.task_id}").joinpath(f"task_{self.task_id}.mp3")])
247
  logging.info("audio extraction finished")
 
 
 
248
 
249
- logging.info("Task Creation Complete.")
250
- logging.info("Task Creation method: Youtube Link")
 
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  super().run_pipeline()
 
11
  from src.srt_util.srt import SrtScript
12
  from src.srt_util.srt2ass import srt2ass
13
  from time import time, strftime, gmtime, sleep
14
+ from translation.translation import get_translation, translate
15
 
16
  import torch
17
  import stable_whisper
 
67
  with self.__status_lock:
68
  self.__status = new_status
69
 
70
+ def __init__(self, task_id, task_local_dir, task_cfg):
71
  self.__status_lock = threading.Lock()
72
  self.__status = TaskStatus.CREATED
73
  openai.api_key = getenv("OPENAI_API_KEY")
74
+ self.launch_info = task_cfg # do not use, just for fallback
75
  self.task_local_dir = task_local_dir
76
+ self.model = task_cfg["model"]
77
  self.gpu_status = 0
78
+ self.output_type = task_cfg["output_type"]
79
+ self.target_lang = task_cfg["target_lang"]
80
+ self.source_lang = task_cfg["source_lang"]
81
+ self.field = task_cfg["field"]
82
  self.task_id = task_id
83
+ self.audio_path = None
84
  self.SRT_Script = None
85
  self.result = None
86
  self.s_t = None
87
  self.t_e = None
88
 
89
+ print(f" Task ID: {self.task_id}")
90
+ logging.info(f" Task ID: {self.task_id}")
91
+ logging.info(f" {self.source_lang} -> {self.target_lang} task in {self.field}")
92
+ logging.info(f" Model: \t\t\t{self.model}")
93
+ logging.info(f" subtitle_type: \t\t{self.output_type['subtitle']}")
94
+ logging.info(f" video_ouput: \t\t{self.output_type['video']}")
95
+ logging.info(f" bilingal_ouput: \t{self.output_type['bilingal']}")
96
 
97
  @staticmethod
98
+ def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
99
  # convert to audio
100
+ logging.info(" Task Creation method: Youtube Link")
101
+ return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
 
102
 
103
  @staticmethod
104
+ def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
105
+ # get audio path
106
+ logging.info(" Task Creation method: Audio File")
107
+ return AudioTask(task_id, task_dir, task_cfg, audio_path)
108
 
109
  @staticmethod
110
+ def fromVideoFile(video_path, task_id, task_dir, task_cfg):
111
+ # get audio path
112
+ logging.info(" Task Creation method: Video File")
113
+ return VideoTask(task_id, task_dir, task_cfg, video_path)
114
 
115
  # Module 1 ASR: audio --> SRT_script
116
  def get_srt_class(self, whisper_model='tiny', method="stable"):
 
153
  time.sleep(5)
154
  pass
155
 
156
+ # Module 2: SRT preprocess: perform preprocess steps
157
+ # TODO: multi-lang and multi-field support according to task_cfg
158
  def preprocess(self):
159
  self.status = TaskStatus.PRE_PROCESSING
160
  logging.info("--------------------Start Preprocessing SRT class--------------------")
161
  self.SRT_Script.form_whole_sentence()
162
  # self.SRT_Script.spell_check_term()
163
  self.SRT_Script.correct_with_force_term()
164
+ processed_srt_path_en = str(Path(self.task_local_dir).with_suffix('')) + '_processed.srt'
165
  self.SRT_Script.write_srt_file_src(processed_srt_path_en)
166
 
167
+ if self.output_type["subtitle"] == "ass":
168
  logging.info("write English .srt file to .ass")
169
  assSub_en = srt2ass(processed_srt_path_en)
170
  logging.info('ASS subtitle saved as: ' + assSub_en)
 
178
 
179
  # Module 3: perform srt translation
180
  def translation(self):
181
+ logging.info("---------------------Start Translation--------------------")
182
+ get_translation(self.srt,self.model, self.video_name, self.video_link)
183
  time.sleep(5)
 
 
 
184
  pass
185
 
186
  # Module 4: perform srt post process steps
 
222
  def output_render(self):
223
  self.status = TaskStatus.OUTPUT_MODULE
224
  return "TODO"
 
225
 
226
  def run_pipeline(self):
227
  self.get_srt_class()
 
231
  self.result = self.output_render()
232
 
233
  class YoutubeTask(Task):
234
+ def __init__(self, task_id, task_local_dir, task_cfg, youtube_url):
235
+ super().__init__(task_id, task_local_dir, task_cfg)
236
  self.youtube_url = youtube_url
237
 
238
  def run(self):
239
  yt = YouTube(self.youtube_url)
 
240
  video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
241
 
242
  if video:
243
+ video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
244
+ logging.info(f'Video Name: {video.default_filename}')
245
  else:
246
+ raise FileNotFoundError(f" Video stream not found for link {self.youtube_url}")
247
 
248
  audio = yt.streams.filter(only_audio=True).first()
249
  if audio:
250
+ audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
251
+ # logging.info(f'Audio download completed to {self.task_local_dir}!')
252
  else:
253
+ logging.info(" download audio failed, using ffmpeg to extract audio")
254
  subprocess.run(
255
+ ['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
256
+ '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
257
  logging.info("audio extraction finished")
258
+
259
+ self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
260
+ self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
261
 
262
+ logging.info(f" Video File Dir: {self.video_path}")
263
+ logging.info(f" Audio File Dir: {self.audio_path}")
264
+ logging.info(" Data Prep Complete. Start pipeline")
265
 
266
+ super().run_pipeline()
267
+
268
+ class AudioTask(Task):
269
+ def __init__(self, task_id, task_local_dir, task_cfg, audio_path):
270
+ super().__init__(task_id, task_local_dir, task_cfg)
271
+ # TODO: check audio format
272
+ self.audio_path = audio_path
273
+ self.video_path = None
274
+
275
+ def run(self):
276
+ logging.info(f" Video File Dir: {self.video_path}")
277
+ logging.info(f" Audio File Dir: {self.audio_path}")
278
+ logging.info("Data Prep Complete. Start pipeline")
279
+ super().run_pipeline()
280
+
281
+ class VideoTask(Task):
282
+ def __init__(self, task_id, task_local_dir, task_cfg, video_path):
283
+ super().__init__(task_id, task_local_dir, task_cfg)
284
+ # TODO: check video format {.mp4}
285
+ self.video_path = video_path
286
+
287
+ def run(self):
288
+ logging.info("using ffmpeg to extract audio")
289
+ subprocess.run(
290
+ ['ffmpeg', '-i', self.video_path, '-f', 'mp3',
291
+ '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
292
+ logging.info("audio extraction finished")
293
+
294
+ self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
295
+ logging.info(f" Video File Dir: {self.video_path}")
296
+ logging.info(f" Audio File Dir: {self.audio_path}")
297
+ logging.info("Data Prep Complete. Start pipeline")
298
  super().run_pipeline()
src/translation/LLM_task.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import openai
3
+
4
+
5
+ def LLM_task(model_name, input, task, temp = 0.15):
6
+ """
7
+ Translates input sentence with desired LLM.
8
+
9
+ :param model_name: The name of the translation model to be used.
10
+ :param input: Sentence for translation.
11
+ :param task: Prompt.
12
+ :param temp: Model temperature.
13
+ """
14
+ if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
15
+ response = openai.ChatCompletion.create(
16
+ model=model_name,
17
+ messages=[
18
+ {"role": "system","content": task},
19
+ {"role": "user", "content": input}
20
+ ],
21
+ temperature=temp
22
+ )
23
+ return response['choices'][0]['message']['content'].strip()
24
+ # Other LLM not implemented
25
+ else:
26
+ raise NotImplementedError
src/translation/translation.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import getenv
2
+ import logging
3
+ from time import sleep
4
+ from tqdm import tqdm
5
+ from src.srt_util.srt import split_script
6
+ from LLM_task import LLM_task
7
+
8
+ def get_translation(srt,model,video_name,video_link):
9
+ script_arr, range_arr = split_script(srt)
10
+ translate(srt, script_arr, range_arr, model, video_name, video_link)
11
+ pass
12
+
13
+ def check_translation(sentence, translation):
14
+ """
15
+ check merge sentence issue from openai translation
16
+ """
17
+ sentence_count = sentence.count('\n\n') + 1
18
+ translation_count = translation.count('\n\n') + 1
19
+
20
+ if sentence_count != translation_count:
21
+ # print("sentence length: ", len(sentence), sentence_count)
22
+ # print("translation length: ", len(translation), translation_count)
23
+ return False
24
+ else:
25
+ return True
26
+
27
+
28
+ def translate(srt, script_arr, range_arr, model_name, video_name, video_link, attempts_count=5, task=None, temp = 0.15):
29
+ """
30
+ Translates the given script array into another language using the chatgpt and writes to the SRT file.
31
+
32
+ This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
33
+ through sentences and range in the script and range arrays. If the translation check fails for five times, the function
34
+ will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
35
+
36
+ :param srt: An instance of the Subtitle class representing the SRT file.
37
+ :param script_arr: A list of strings representing the original script sentences to be translated.
38
+ :param range_arr: A list of tuples representing the start and end positions of sentences in the script.
39
+ :param model_name: The name of the translation model to be used.
40
+ :param video_name: The name of the video.
41
+ :param video_link: The link to the video.
42
+ :param attempts_count: Number of attemps of failures for unmatched sentences.
43
+ :param task: Prompt.
44
+ :param temp: Model temperature.
45
+ """
46
+ #logging.info("Start translating...")
47
+ if input is None:
48
+ raise Exception("Warning! No Input have passed to LLM!")
49
+ if task is None:
50
+ task = "你是一个翻译助理,你的任务是翻译星际争霸视频,你会被提供一个按行分割的英文段落,你需要在保证句意和行数的情况下输出翻译后的文本。"
51
+
52
+ previous_length = 0
53
+ for sentence, range_ in tqdm(zip(script_arr, range_arr)):
54
+ # update the range based on previous length
55
+ range_ = (range_[0] + previous_length, range_[1] + previous_length)
56
+ # using chatgpt model
57
+ print(f"now translating sentences {range_}")
58
+ #logging.info(f"now translating sentences {range_}, time: {datetime.now()}")
59
+ flag = True
60
+ while flag:
61
+ flag = False
62
+ try:
63
+ translate = LLM_task(model_name, sentence)
64
+ # detect merge sentence issue and try to solve for five times:
65
+ while not check_translation(sentence, translate) and attempts_count > 0:
66
+ translate = LLM_task(model_name,sentence,task,temp)
67
+ attempts_count -= 1
68
+
69
+ # if failure still happen, split into smaller tokens
70
+ if attempts_count == 0:
71
+ single_sentences = sentence.split("\n\n")
72
+ logging.info("merge sentence issue found for range", range_)
73
+ translate = ""
74
+ for i, single_sentence in enumerate(single_sentences):
75
+ if i == len(single_sentences) - 1:
76
+ translate += LLM_task(model_name,sentence,task,temp)
77
+ else:
78
+ translate += LLM_task(model_name,sentence,task,temp) + "\n\n"
79
+ logging.info("solved by individually translation!")
80
+
81
+ except Exception as e:
82
+ logging.debug("An error has occurred during translation:", e)
83
+ print("An error has occurred during translation:", e)
84
+ print("Retrying... the script will continue after 30 seconds.")
85
+ sleep(30)
86
+ flag = True
87
+
88
+ srt.set_translation(translate, range_, model_name, video_name, video_link)