Spaces:
Running
Running
| # Run with 'python -m unittest tests.test_translation' | |
| import unittest | |
| import tempfile | |
| import os | |
| from translate import main | |
| import transformers | |
| class Inputs(unittest.TestCase): | |
| def test_m2m100_inputs(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang="en", | |
| target_lang="es", | |
| starting_batch_size=32, | |
| model_name="facebook/m2m100_418M", | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision=None, | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=None, | |
| sentences_dir=tmpdirname, | |
| files_extension="txt", | |
| output_path=os.path.join(tmpdirname, "target"), | |
| source_lang="en", | |
| target_lang="es", | |
| starting_batch_size=32, | |
| model_name="facebook/m2m100_418M", | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision=None, | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| class Translations(unittest.TestCase): | |
| def test_m2m100(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "facebook/m2m100_418M" | |
| src_lang = "en" | |
| tgt_lang = "es" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| def test_nllb200(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "facebook/nllb-200-distilled-600M" | |
| src_lang = "eng_Latn" | |
| tgt_lang = "spa_Latn" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| def test_mbart(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "facebook/mbart-large-50" | |
| src_lang = "en_XX" | |
| tgt_lang = "es_XX" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| def test_opus(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "Helsinki-NLP/opus-mt-en-es" | |
| src_lang = None | |
| tgt_lang = None | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=False, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=False, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| def test_small100(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "alirezamsh/small100" | |
| src_lang = None | |
| tgt_lang = "es" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| def test_seamless(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "facebook/hf-seamless-m4t-medium" | |
| src_lang = "eng" | |
| tgt_lang = "spa" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=src_lang, | |
| target_lang=tgt_lang, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=False, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=None, | |
| ) | |
| class Prompting(unittest.TestCase): | |
| def test_llama(self): | |
| # Create a temporary directory | |
| with tempfile.TemporaryDirectory() as tmpdirname: | |
| # Create a temporary file | |
| input_path = os.path.join(tmpdirname, "source.txt") | |
| output_path = os.path.join(tmpdirname, "target.txt") | |
| with open( | |
| os.path.join(tmpdirname, "source.txt"), "w", encoding="utf8" | |
| ) as f: | |
| print("Hello, world, my name is Iker!", file=f) | |
| model_name = "stas/tiny-random-llama-2" | |
| prompt = "Translate English to Spanish: %%SENTENCE%%" | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=None, | |
| target_lang=None, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="bf16", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=True, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=prompt, | |
| ) | |
| main( | |
| sentences_path=input_path, | |
| sentences_dir=None, | |
| files_extension="txt", | |
| output_path=output_path, | |
| source_lang=None, | |
| target_lang=None, | |
| starting_batch_size=32, | |
| model_name=model_name, | |
| lora_weights_name_or_path=None, | |
| force_auto_device_map=True, | |
| precision="4", | |
| max_length=64, | |
| num_beams=2, | |
| num_return_sequences=1, | |
| do_sample=True, | |
| temperature=1.0, | |
| top_k=50, | |
| top_p=1.0, | |
| keep_special_tokens=False, | |
| keep_tokenization_spaces=False, | |
| repetition_penalty=None, | |
| prompt=prompt, | |
| ) | |