NLLB 600m Tibetan

State of the art

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

def translate(text, source_lang, target_lang, model_name="TenzinGayche/nllb_600M_bi_boen_3"):
    # Define flores codes
    flores_codes = {
        "Standard Tibetan": "bod_Tibt",
        "English": "eng_Latn"
    }

    # Convert language names to flores codes
    source = flores_codes[source_lang]
    target = flores_codes[target_lang]

    # Load model and tokenizer
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")

    # Check if a GPU is available and set device accordingly
    device = 0 if torch.cuda.is_available() else -1

    # Create translator pipeline
    translator = pipeline('translation', model=model, tokenizer=tokenizer, 
                          src_lang=source, tgt_lang=target, device=device)

    # Perform translation
    output = translator(text, max_length=400)

    # Extract translated text
    translated_text = output[0]['translation_text']

    return translated_text

# Example usage
if __name__ == "__main__":
    input_text = "Hello, how are you?"
    source_language = "English"
    target_language = "Standard Tibetan"
    
    result = translate(input_text, source_language, target_language)
    print(f"Original: {input_text}")
    print(f"Translated: {result}")
Downloads last month
188
Safetensors
Model size
615M params
Tensor type
F32
ยท
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Model tree for TenzinGayche/nllb_600M_bi_boen_3

Finetuned
(104)
this model

Space using TenzinGayche/nllb_600M_bi_boen_3 1