DWizard commited on
Commit
e4c138e
·
1 Parent(s): 6041d53

add pre/post process config

Browse files

Former-commit-id: 3538f88e87cedf5a1ff995b4dc73754b9951fe4d

Files changed (2) hide show
  1. configs/task_config.yaml +7 -1
  2. src/task.py +10 -4
configs/task_config.yaml CHANGED
@@ -8,4 +8,10 @@ output_type:
8
  source_lang: EN
9
  target_lang: ZH
10
  field: SC2
11
- chunk_size: 1000
 
 
 
 
 
 
 
8
  source_lang: EN
9
  target_lang: ZH
10
  field: SC2
11
+ chunk_size: 1000
12
+ pre_process:
13
+ ON: True
14
+ spell_check: False
15
+ term_correct: True
16
+ post_process:
17
+ ON: True
src/task.py CHANGED
@@ -79,6 +79,8 @@ class Task:
79
  self.target_lang = task_cfg["target_lang"]
80
  self.source_lang = task_cfg["source_lang"]
81
  self.field = task_cfg["field"]
 
 
82
  self.task_id = task_id
83
  self.audio_path = None
84
  self.SRT_Script = None
@@ -155,8 +157,10 @@ class Task:
155
  self.status = TaskStatus.PRE_PROCESSING
156
  logging.info("--------------------Start Preprocessing SRT class--------------------")
157
  self.SRT_Script.form_whole_sentence()
158
- # self.SRT_Script.spell_check_term()
159
- self.SRT_Script.correct_with_force_term()
 
 
160
  processed_srt_path_src = str(Path(self.task_local_dir) / f'{self.task_id}_processed.srt')
161
  self.SRT_Script.write_srt_file_src(processed_srt_path_src)
162
 
@@ -221,9 +225,11 @@ class Task:
221
 
222
  def run_pipeline(self):
223
  self.get_srt_class()
224
- self.preprocess()
 
225
  self.translation()
226
- self.postprocess()
 
227
  self.result = self.output_render()
228
  print(self.result)
229
 
 
79
  self.target_lang = task_cfg["target_lang"]
80
  self.source_lang = task_cfg["source_lang"]
81
  self.field = task_cfg["field"]
82
+ self.pre_setting = task_cfg["pre_process"]
83
+ self.post_setting = task_cfg["post_process"]
84
  self.task_id = task_id
85
  self.audio_path = None
86
  self.SRT_Script = None
 
157
  self.status = TaskStatus.PRE_PROCESSING
158
  logging.info("--------------------Start Preprocessing SRT class--------------------")
159
  self.SRT_Script.form_whole_sentence()
160
+ if self.pre_setting["spell_check"]:
161
+ self.SRT_Script.spell_check_term()
162
+ if self.pre_setting["term_correct"]:
163
+ self.SRT_Script.correct_with_force_term()
164
  processed_srt_path_src = str(Path(self.task_local_dir) / f'{self.task_id}_processed.srt')
165
  self.SRT_Script.write_srt_file_src(processed_srt_path_src)
166
 
 
225
 
226
  def run_pipeline(self):
227
  self.get_srt_class()
228
+ if self.pre_setting["ON"]:
229
+ self.preprocess()
230
  self.translation()
231
+ if self.post_setting["ON"]:
232
+ self.postprocess()
233
  self.result = self.output_render()
234
  print(self.result)
235