Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

CanYing0913 commited on Apr 22, 2023

Commit

7d74f8e

1 Parent(s): cba75b6

Update srt.py and file hierarchy

Browse files

Former-commit-id: d36b43736cb3447da3e26e3caef1e351bf431dc3

Files changed (6) hide show

doc/Installation.md +7 -0
doc/struct.md +7 -0
pipeline.py +5 -5
srt_util/__init__.py +0 -0
SRT.py → srt_util/srt.py +16 -23
srt2ass.py → srt_util/srt2ass.py +0 -0

doc/Installation.md ADDED Viewed

	@@ -0,0 +1,7 @@

+### **Recommended:**
+We recommend you to configure your environment using [mamba](https://pypi.org/project/mamba/). The following packages are required:
+```
+openai
+openai-whisper
+```

doc/struct.md ADDED Viewed

	@@ -0,0 +1,7 @@

+# Structure of Repository
+```
+├── doc             # Baseline implementation of SpMM algorithm.
+├────── struct.md   # Document of repository structure.
+├── finetune_data   #
+└── README.md
+```

pipeline.py CHANGED Viewed

@@ -3,10 +3,10 @@ from pytube import YouTube
 import argparse
 import os
 from tqdm import tqdm
-from SRT import SRT_script
 import stable_whisper
 import whisper
-from srt2ass import srt2ass
 import subprocess
@@ -85,7 +85,7 @@ def get_sources(args, download_path, result_path, video_name):
 def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
     # Instead of using the script_en variable directly, we'll use script_input
     if srt_file_en is not None:
-        srt = SRT_script.parse_from_srt_file(srt_file_en)
     else:
         # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
         srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
@@ -115,10 +115,10 @@ def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file =
             else:
                 raise ValueError("invalid speech to text method")
-            srt = SRT_script(transcript['segments']) # read segments to SRT class
         else:
-            srt = SRT_script.parse_from_srt_file(srt_file_en)
     return srt_file_en, srt
 # Split the video script by sentences and create chunks within the token limit

 import argparse
 import os
 from tqdm import tqdm
+from srt_util.srt import SrtScript
 import stable_whisper
 import whisper
+from srt_util.srt2ass import srt2ass
 import subprocess
 def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
     # Instead of using the script_en variable directly, we'll use script_input
     if srt_file_en is not None:
+        srt = SrtScript.parse_from_srt_file(srt_file_en)
     else:
         # using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
         srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
             else:
                 raise ValueError("invalid speech to text method")
+            srt = SrtScript(transcript['segments']) # read segments to SRT class
         else:
+            srt = SrtScript.parse_from_srt_file(srt_file_en)
     return srt_file_en, srt
 # Split the video script by sentences and create chunks within the token limit

srt_util/__init__.py ADDED Viewed

File without changes

SRT.py → srt_util/srt.py RENAMED Viewed

@@ -7,7 +7,7 @@ from datetime import timedelta
 import openai
-class SRT_segment(object):
     def __init__(self, *args) -> None:
         if isinstance(args[0], dict):
             segment = args[0]
@@ -63,28 +63,23 @@ class SRT_segment(object):
         self.end = seg.end
         self.end_ms = seg.end_ms
         self.duration = f"{self.start_time_str} --> {self.end_time_str}"
-        pass
     def __add__(self, other):
         """
         Merge the segment seg with the current segment, and return the new constructed segment.
         No in-place modification.
         :param other: Another segment that is strictly next to added segment.
         :return: new segment of the two sub-segments
         """
         # assert other.start_ms == self.end_ms, f"cannot merge discontinuous segments."
         result = deepcopy(self)
-        result.source_text += f' {other.source_text}'
-        result.translation += f' {other.translation}'
-        result.end_time_str = other.end_time_str
-        result.end = other.end
-        result.end_ms = other.end_ms
-        result.duration = f"{self.start_time_str} --> {self.end_time_str}"
         return result
-    def remove_trans_punc(self):
         """
-        remove punctuations in translation text
         :return: None
         """
         punc_cn = "，。！？"
@@ -101,12 +96,9 @@ class SRT_segment(object):
         return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
-class SRT_script():
     def __init__(self, segments) -> None:
-        self.segments = []
-        for seg in segments:
-            srt_seg = SRT_segment(seg)
-            self.segments.append(srt_seg)
     @classmethod
     def parse_from_srt_file(cls, path: str):
@@ -114,13 +106,12 @@ class SRT_script():
             script_lines = [line.rstrip() for line in f.readlines()]
         segments = []
-        for i in range(len(script_lines)):
-            if i % 4 == 0:
-                segments.append(list(script_lines[i:i + 4]))
         return cls(segments)
-    def merge_segs(self, idx_list) -> SRT_segment:
         """
         Merge entire segment list to a single segment
         :param idx_list: List of index to merge
@@ -145,6 +136,7 @@ class SRT_script():
         """
         merge_list = []  # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
         sentence = []
         for i, seg in enumerate(self.segments):
             if seg.source_text[-1] in ['.', '!', '?'] and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
                 sentence.append(i)
@@ -153,6 +145,7 @@ class SRT_script():
             else:
                 sentence.append(i)
         segments = []
         for idx_list in merge_list:
             segments.append(self.merge_segs(idx_list))
@@ -327,14 +320,14 @@ class SRT_script():
         seg1_dict['text'] = src_seg1
         seg1_dict['start'] = start_seg1
         seg1_dict['end'] = end_seg1
-        seg1 = SRT_segment(seg1_dict)
         seg1.translation = trans_seg1
         seg2_dict = {}
         seg2_dict['text'] = src_seg2
         seg2_dict['start'] = start_seg2
         seg2_dict['end'] = end_seg2
-        seg2 = SRT_segment(seg2_dict)
         seg2.translation = trans_seg2
         result_list = []
@@ -386,7 +379,7 @@ class SRT_script():
         ## force term correction
         # load term dictionary
-        with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
             term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
         # change term
@@ -455,7 +448,7 @@ class SRT_script():
         pos = uncover(word)[1]
         new_word = word
         if arg == 0:  # term translate mode
-            with open("finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
                 term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
             if real_word in term_enzh_dict:
                 new_word = word.replace(word[:pos], term_enzh_dict.get(real_word))

 import openai
+class SrtSegment(object):
     def __init__(self, *args) -> None:
         if isinstance(args[0], dict):
             segment = args[0]
         self.end = seg.end
         self.end_ms = seg.end_ms
         self.duration = f"{self.start_time_str} --> {self.end_time_str}"
     def __add__(self, other):
         """
         Merge the segment seg with the current segment, and return the new constructed segment.
         No in-place modification.
+        This is used for '+' operator.
         :param other: Another segment that is strictly next to added segment.
         :return: new segment of the two sub-segments
         """
         # assert other.start_ms == self.end_ms, f"cannot merge discontinuous segments."
         result = deepcopy(self)
+        result.merge_seg(other)
         return result
+    def remove_trans_punc(self) -> None:
         """
+        remove CN punctuations in translation text
         :return: None
         """
         punc_cn = "，。！？"
         return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
+class SrtScript(object):
     def __init__(self, segments) -> None:
+        self.segments = [SrtSegment(seg) for seg in segments]
     @classmethod
     def parse_from_srt_file(cls, path: str):
             script_lines = [line.rstrip() for line in f.readlines()]
         segments = []
+        for i in range(0, len(script_lines), 4):
+            segments.append(list(script_lines[i:i + 4]))
         return cls(segments)
+    def merge_segs(self, idx_list) -> SrtSegment:
         """
         Merge entire segment list to a single segment
         :param idx_list: List of index to merge
         """
         merge_list = []  # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
         sentence = []
+        # Get each entire sentence of distinct segments, fill indices to merge_list
         for i, seg in enumerate(self.segments):
             if seg.source_text[-1] in ['.', '!', '?'] and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
                 sentence.append(i)
             else:
                 sentence.append(i)
+        # Reconstruct segments, each with an entire sentence
         segments = []
         for idx_list in merge_list:
             segments.append(self.merge_segs(idx_list))
         seg1_dict['text'] = src_seg1
         seg1_dict['start'] = start_seg1
         seg1_dict['end'] = end_seg1
+        seg1 = SrtSegment(seg1_dict)
         seg1.translation = trans_seg1
         seg2_dict = {}
         seg2_dict['text'] = src_seg2
         seg2_dict['start'] = start_seg2
         seg2_dict['end'] = end_seg2
+        seg2 = SrtSegment(seg2_dict)
         seg2.translation = trans_seg2
         result_list = []
         ## force term correction
         # load term dictionary
+        with open("../finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
             term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
         # change term
         pos = uncover(word)[1]
         new_word = word
         if arg == 0:  # term translate mode
+            with open("../finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
                 term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
             if real_word in term_enzh_dict:
                 new_word = word.replace(word[:pos], term_enzh_dict.get(real_word))

srt2ass.py → srt_util/srt2ass.py RENAMED Viewed

File without changes