Spaces:
Configuration error
Configuration error
File size: 5,223 Bytes
c8d5c10 90b9f89 c712205 ec27e8c c712205 a1258b5 c712205 91c078f c712205 91c078f c712205 91c078f c712205 91c078f fa7018d 91c078f fa7018d 91c078f fa7018d 91c078f fa7018d 91c078f c712205 fa7018d a1258b5 fa7018d 189e4cc c712205 a1258b5 c712205 fa7018d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
import re
from openai import OpenAI
client = OpenAI(api_key=os.environ.get('OPENAI_KEY'))
from tqdm import tqdm
import argparse
# Set your OpenAI API key
# List of target languages
languages = [
"Spanish", "French", "German", "Italian", "Portuguese", "Arabic", "Japanese",
"Indonesian", "Swedish", "Danish", "Korean", "Polish", "Thai", "Chinese", "Vietnamese"
]
def read_sbv_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
return file.readlines()
def write_sbv_file(file_path, lines):
with open(file_path, 'w', encoding='utf-8') as file:
file.writelines(lines)
def translate_text(text, target_language):
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": f"You are a professional translator. Translate the following text to {target_language}. Preserve the original formatting and line breaks."
},
{"role": "user", "content": text}
]
)
return response.choices[0].message.content
def get_base_filename(input_file):
return os.path.splitext(os.path.basename(input_file))[0]
def create_output_directory(output_dir):
os.makedirs(output_dir, exist_ok=True)
def parse_sbv_file(lines):
blocks = []
i = 0
while i < len(lines):
timestamp, text, i = parse_block(lines, i)
if timestamp:
blocks.append({'timestamp': timestamp, 'text': text})
return blocks
def parse_block(lines, i):
line = lines[i].strip()
if re.match(r'^\d{1,2}:\d{2}:\d{2}\.\d{3},\d{1,2}:\d{2}:\d{2}\.\d{3}$', line):
timestamp = line
i += 1
text, i = parse_text_lines(lines, i)
return timestamp, text, i
return None, None, i + 1
def parse_text_lines(lines, i):
text_lines = []
while i < len(lines) and lines[i].strip() != '':
text_lines.append(lines[i])
i += 1
while i < len(lines) and lines[i].strip() == '':
i += 1
return ''.join(text_lines).strip(), i
def translate_blocks(blocks, target_language):
return [translate_block(block, target_language) for block in tqdm(blocks, desc=f"Translating blocks for {target_language}")]
def translate_block(block, target_language):
translated_text = translate_text(block['text'], target_language) if block['text'] else ''
return {'timestamp': block['timestamp'], 'text': translated_text}
def reconstruct_sbv_content(blocks):
output_lines = []
for block in blocks:
output_lines.append(block['timestamp'] + '\n')
if block['text']:
output_lines.append(block['text'] + '\n')
output_lines.append('\n')
return output_lines
def get_output_file_path(output_dir, base_filename, lang):
return os.path.join(output_dir, f"{base_filename}_{lang}.sbv")
def translate_to_language(input_file, output_dir, language):
"""
Translate an SBV subtitle file to a single language.
Args:
input_file (str): Path to the input SBV file.
output_dir (str): Path to the directory where translated file will be saved.
language (str): Target language for translation.
Returns:
str: Path to the translated output file.
"""
lines = read_sbv_file(input_file)
base_filename = get_base_filename(input_file)
create_output_directory(output_dir)
blocks = parse_sbv_file(lines)
print(f"\nStarting translation for language: {language}")
translated_blocks = translate_blocks(blocks, language)
output_content = reconstruct_sbv_content(translated_blocks)
output_file = get_output_file_path(output_dir, base_filename, language)
write_sbv_file(output_file, output_content)
print(f"Translated {language} file saved: {output_file}")
return output_file
def translate_to_languages(input_file, output_dir, selected_languages):
"""
Translate an SBV subtitle file to multiple languages.
Args:
input_file (str): Path to the input SBV file.
output_dir (str): Path to the directory where translated files will be saved.
selected_languages (list): List of target languages for translation.
Returns:
list: Paths to the translated output files.
"""
create_output_directory(output_dir)
translated_files = []
for lang in tqdm(selected_languages, desc="Translating languages"):
output_file = translate_to_language(input_file, output_dir, lang)
translated_files.append(output_file)
return translated_files
# Update the main block to use the new functions
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Translate SBV subtitle files to multiple languages.")
parser.add_argument("input_file", help="Path to the input .sbv file")
parser.add_argument("output_dir", help="Path to the output directory")
parser.add_argument("--languages", nargs="+", choices=languages, default=languages, help="Languages to translate to")
args = parser.parse_args()
translated_files = translate_to_languages(args.input_file, args.output_dir, args.languages)
print(f"\nTranslation completed. {len(translated_files)} files created.") |