Spaces:
Sleeping
Sleeping
Eason Lu
commited on
Commit
·
cd90680
1
Parent(s):
d658831
modify logging
Browse filesFormer-commit-id: 58f686a3649affdfb51a17120dc6697cac93e637
- src/task.py +15 -12
src/task.py
CHANGED
@@ -11,8 +11,6 @@ import subprocess
|
|
11 |
from src.srt_util.srt import SrtScript
|
12 |
from src.srt_util.srt2ass import srt2ass
|
13 |
|
14 |
-
|
15 |
-
|
16 |
"""
|
17 |
Youtube link
|
18 |
- link
|
@@ -85,25 +83,25 @@ class Task:
|
|
85 |
logging.info(f" {self.source_lang} -> {self.target_lang} task in {self.field}")
|
86 |
logging.info(f" Model: {self.model}")
|
87 |
logging.info(f" subtitle_type: {self.output_type['subtitle']}")
|
88 |
-
logging.info(f" video_ouput:
|
89 |
-
logging.info(f" bilingal_ouput:
|
90 |
|
91 |
@staticmethod
|
92 |
def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
|
93 |
# convert to audio
|
94 |
-
logging.info("Task Creation method: Youtube Link")
|
95 |
return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
|
96 |
|
97 |
@staticmethod
|
98 |
def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
|
99 |
# get audio path
|
100 |
-
logging.info("Task Creation method: Audio File")
|
101 |
return AudioTask(task_id, task_dir, task_cfg, audio_path)
|
102 |
|
103 |
@staticmethod
|
104 |
def fromVideoFile(video_path, task_id, task_dir, task_cfg):
|
105 |
# get audio path
|
106 |
-
logging.info("Task Creation method: Video File")
|
107 |
return VideoTask(task_id, task_dir, task_cfg, video_path)
|
108 |
|
109 |
# Module 1 ASR: audio --> SRT_script
|
@@ -172,16 +170,15 @@ class YoutubeTask(Task):
|
|
172 |
if video:
|
173 |
video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
|
174 |
logging.info(f'Video Name: {video.default_filename}')
|
175 |
-
logging.info(f'Video download completed to {self.task_local_dir}!')
|
176 |
else:
|
177 |
-
raise FileNotFoundError(f"Video stream not found for link {self.youtube_url}")
|
178 |
|
179 |
audio = yt.streams.filter(only_audio=True).first()
|
180 |
if audio:
|
181 |
audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
|
182 |
-
logging.info(f'Audio download completed to {self.task_local_dir}!')
|
183 |
else:
|
184 |
-
logging.info("download audio failed, using ffmpeg to extract audio")
|
185 |
subprocess.run(
|
186 |
['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
|
187 |
'-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
|
@@ -190,7 +187,9 @@ class YoutubeTask(Task):
|
|
190 |
self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
|
191 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
192 |
|
193 |
-
logging.info("
|
|
|
|
|
194 |
|
195 |
super().run_pipeline()
|
196 |
|
@@ -202,6 +201,8 @@ class AudioTask(Task):
|
|
202 |
self.video_path = None
|
203 |
|
204 |
def run(self):
|
|
|
|
|
205 |
logging.info("Data Prep Complete. Start pipeline")
|
206 |
super().run_pipeline()
|
207 |
|
@@ -219,5 +220,7 @@ class VideoTask(Task):
|
|
219 |
logging.info("audio extraction finished")
|
220 |
|
221 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
|
|
|
|
222 |
logging.info("Data Prep Complete. Start pipeline")
|
223 |
super().run_pipeline()
|
|
|
11 |
from src.srt_util.srt import SrtScript
|
12 |
from src.srt_util.srt2ass import srt2ass
|
13 |
|
|
|
|
|
14 |
"""
|
15 |
Youtube link
|
16 |
- link
|
|
|
83 |
logging.info(f" {self.source_lang} -> {self.target_lang} task in {self.field}")
|
84 |
logging.info(f" Model: {self.model}")
|
85 |
logging.info(f" subtitle_type: {self.output_type['subtitle']}")
|
86 |
+
logging.info(f" video_ouput: {self.output_type['video']}")
|
87 |
+
logging.info(f" bilingal_ouput: {self.output_type['bilingal']}")
|
88 |
|
89 |
@staticmethod
|
90 |
def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
|
91 |
# convert to audio
|
92 |
+
logging.info(" Task Creation method: Youtube Link")
|
93 |
return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
|
94 |
|
95 |
@staticmethod
|
96 |
def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
|
97 |
# get audio path
|
98 |
+
logging.info(" Task Creation method: Audio File")
|
99 |
return AudioTask(task_id, task_dir, task_cfg, audio_path)
|
100 |
|
101 |
@staticmethod
|
102 |
def fromVideoFile(video_path, task_id, task_dir, task_cfg):
|
103 |
# get audio path
|
104 |
+
logging.info(" Task Creation method: Video File")
|
105 |
return VideoTask(task_id, task_dir, task_cfg, video_path)
|
106 |
|
107 |
# Module 1 ASR: audio --> SRT_script
|
|
|
170 |
if video:
|
171 |
video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
|
172 |
logging.info(f'Video Name: {video.default_filename}')
|
|
|
173 |
else:
|
174 |
+
raise FileNotFoundError(f" Video stream not found for link {self.youtube_url}")
|
175 |
|
176 |
audio = yt.streams.filter(only_audio=True).first()
|
177 |
if audio:
|
178 |
audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
|
179 |
+
# logging.info(f'Audio download completed to {self.task_local_dir}!')
|
180 |
else:
|
181 |
+
logging.info(" download audio failed, using ffmpeg to extract audio")
|
182 |
subprocess.run(
|
183 |
['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
|
184 |
'-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
|
|
|
187 |
self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
|
188 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
189 |
|
190 |
+
logging.info(f" Video File Dir: {self.video_path}")
|
191 |
+
logging.info(f" Audio File Dir: {self.audio_path}")
|
192 |
+
logging.info(" Data Prep Complete. Start pipeline")
|
193 |
|
194 |
super().run_pipeline()
|
195 |
|
|
|
201 |
self.video_path = None
|
202 |
|
203 |
def run(self):
|
204 |
+
logging.info(f" Video File Dir: {self.video_path}")
|
205 |
+
logging.info(f" Audio File Dir: {self.audio_path}")
|
206 |
logging.info("Data Prep Complete. Start pipeline")
|
207 |
super().run_pipeline()
|
208 |
|
|
|
220 |
logging.info("audio extraction finished")
|
221 |
|
222 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
223 |
+
logging.info(f" Video File Dir: {self.video_path}")
|
224 |
+
logging.info(f" Audio File Dir: {self.audio_path}")
|
225 |
logging.info("Data Prep Complete. Start pipeline")
|
226 |
super().run_pipeline()
|