File size: 5,223 Bytes
c8d5c10
90b9f89
c712205
 
 
 
 
 
 
 
 
 
 
 
 
 
ec27e8c
c712205
 
 
 
 
 
 
 
 
 
a1258b5
c712205
 
 
 
 
 
 
 
 
 
 
 
91c078f
 
 
 
c712205
 
91c078f
c712205
 
 
91c078f
 
c712205
91c078f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa7018d
91c078f
fa7018d
91c078f
 
 
fa7018d
 
91c078f
 
fa7018d
91c078f
 
 
 
 
c712205
fa7018d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1258b5
fa7018d
 
 
 
 
 
189e4cc
c712205
 
 
a1258b5
c712205
 
fa7018d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import re
from openai import OpenAI

client = OpenAI(api_key=os.environ.get('OPENAI_KEY'))
from tqdm import tqdm
import argparse

# Set your OpenAI API key

# List of target languages
languages = [
    "Spanish", "French", "German", "Italian", "Portuguese", "Arabic", "Japanese",
    "Indonesian", "Swedish", "Danish", "Korean", "Polish", "Thai", "Chinese", "Vietnamese"
]


def read_sbv_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.readlines()

def write_sbv_file(file_path, lines):
    with open(file_path, 'w', encoding='utf-8') as file:
        file.writelines(lines)

def translate_text(text, target_language):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": f"You are a professional translator. Translate the following text to {target_language}. Preserve the original formatting and line breaks."
            },
            {"role": "user", "content": text}
        ]
    )
    return response.choices[0].message.content



def get_base_filename(input_file):
    return os.path.splitext(os.path.basename(input_file))[0]

def create_output_directory(output_dir):
    os.makedirs(output_dir, exist_ok=True)

def parse_sbv_file(lines):
    blocks = []
    i = 0
    while i < len(lines):
        timestamp, text, i = parse_block(lines, i)
        if timestamp:
            blocks.append({'timestamp': timestamp, 'text': text})
    return blocks

def parse_block(lines, i):
    line = lines[i].strip()
    if re.match(r'^\d{1,2}:\d{2}:\d{2}\.\d{3},\d{1,2}:\d{2}:\d{2}\.\d{3}$', line):
        timestamp = line
        i += 1
        text, i = parse_text_lines(lines, i)
        return timestamp, text, i
    return None, None, i + 1

def parse_text_lines(lines, i):
    text_lines = []
    while i < len(lines) and lines[i].strip() != '':
        text_lines.append(lines[i])
        i += 1
    while i < len(lines) and lines[i].strip() == '':
        i += 1
    return ''.join(text_lines).strip(), i

def translate_blocks(blocks, target_language):
    return [translate_block(block, target_language) for block in tqdm(blocks, desc=f"Translating blocks for {target_language}")]

def translate_block(block, target_language):
    translated_text = translate_text(block['text'], target_language) if block['text'] else ''
    return {'timestamp': block['timestamp'], 'text': translated_text}

def reconstruct_sbv_content(blocks):
    output_lines = []
    for block in blocks:
        output_lines.append(block['timestamp'] + '\n')
        if block['text']:
            output_lines.append(block['text'] + '\n')
        output_lines.append('\n')
    return output_lines

def get_output_file_path(output_dir, base_filename, lang):
    return os.path.join(output_dir, f"{base_filename}_{lang}.sbv")

def translate_to_language(input_file, output_dir, language):
    """
    Translate an SBV subtitle file to a single language.

    Args:
        input_file (str): Path to the input SBV file.
        output_dir (str): Path to the directory where translated file will be saved.
        language (str): Target language for translation.

    Returns:
        str: Path to the translated output file.
    """
    lines = read_sbv_file(input_file)
    base_filename = get_base_filename(input_file)
    create_output_directory(output_dir)
    blocks = parse_sbv_file(lines)

    print(f"\nStarting translation for language: {language}")
    translated_blocks = translate_blocks(blocks, language)
    output_content = reconstruct_sbv_content(translated_blocks)
    output_file = get_output_file_path(output_dir, base_filename, language)
    write_sbv_file(output_file, output_content)
    print(f"Translated {language} file saved: {output_file}")
    return output_file

def translate_to_languages(input_file, output_dir, selected_languages):
    """
    Translate an SBV subtitle file to multiple languages.

    Args:
        input_file (str): Path to the input SBV file.
        output_dir (str): Path to the directory where translated files will be saved.
        selected_languages (list): List of target languages for translation.

    Returns:
        list: Paths to the translated output files.
    """
    create_output_directory(output_dir)
    translated_files = []

    for lang in tqdm(selected_languages, desc="Translating languages"):
        output_file = translate_to_language(input_file, output_dir, lang)
        translated_files.append(output_file)

    return translated_files

# Update the main block to use the new functions
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Translate SBV subtitle files to multiple languages.")
    parser.add_argument("input_file", help="Path to the input .sbv file")
    parser.add_argument("output_dir", help="Path to the output directory")
    parser.add_argument("--languages", nargs="+", choices=languages, default=languages, help="Languages to translate to")
    args = parser.parse_args()

    translated_files = translate_to_languages(args.input_file, args.output_dir, args.languages)
    print(f"\nTranslation completed. {len(translated_files)} files created.")