nemabruh404's picture
Upload 6 files
f595d09 verified
raw
history blame
932 Bytes
import torch
from transformers import AutoTokenizer, AutoModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def load_tokenizers_and_embeddings():
# ===== Vietnamese PhoBERT =====
tokenizer_vi = AutoTokenizer.from_pretrained("vinai/phobert-base")
model_vi = AutoModel.from_pretrained("vinai/phobert-base").to(device)
embedding_matrix_vi = model_vi.embeddings.word_embeddings.weight
# ===== English BERT =====
tokenizer_en = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc")
model_en = AutoModel.from_pretrained("bert-base-cased-finetuned-mrpc").to(device)
embedding_matrix_en = model_en.embeddings.word_embeddings.weight
return {
"tokenizer_vi": tokenizer_vi,
"embedding_vi": embedding_matrix_vi,
"tokenizer_en": tokenizer_en,
"embedding_en": embedding_matrix_en,
"device": device
}