Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoTokenizer, AutoModel | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def load_tokenizers_and_embeddings(): | |
| # ===== Vietnamese PhoBERT ===== | |
| tokenizer_vi = AutoTokenizer.from_pretrained("vinai/phobert-base") | |
| model_vi = AutoModel.from_pretrained("vinai/phobert-base").to(device) | |
| embedding_matrix_vi = model_vi.embeddings.word_embeddings.weight | |
| # ===== English BERT ===== | |
| tokenizer_en = AutoTokenizer.from_pretrained("bert-base-cased-finetuned-mrpc") | |
| model_en = AutoModel.from_pretrained("bert-base-cased-finetuned-mrpc").to(device) | |
| embedding_matrix_en = model_en.embeddings.word_embeddings.weight | |
| return { | |
| "tokenizer_vi": tokenizer_vi, | |
| "embedding_vi": embedding_matrix_vi, | |
| "tokenizer_en": tokenizer_en, | |
| "embedding_en": embedding_matrix_en, | |
| "device": device | |
| } | |