from typing import List, Tuple import torch import torch.nn.functional as F from transformers import AutoConfig, AutoTokenizer from model import Easyrec def load_model(model_path: str) -> Tuple[Easyrec, AutoTokenizer]: """ Load the pre-trained model and tokenizer from the specified path. Args: model_path: The path to the pre-trained huggingface model or local directory. Returns: tuple: A tuple containing the model and tokenizer. """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') config = AutoConfig.from_pretrained(model_path) model = Easyrec.from_pretrained(model_path, config=config).to(device) tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) return model, tokenizer def compute_embeddings( sentences: List[str], model: Easyrec, tokenizer: AutoTokenizer, batch_size: int = 8) -> torch.Tensor: """ Compute embeddings for a list of sentences using the specified model and tokenizer. Args: sentences: A list of sentences for which to compute embeddings. model: The pre-trained model used for generating embeddings. tokenizer: The tokenizer used to preprocess the sentences. batch_size: The number of sentences to process in each batch (default is 8). Returns: torch.Tensor: A tensor containing the normalized embeddings for the input sentences. """ embeddings = [] count_sentences = len(sentences) device = next(model.parameters()).device # Get the device on which the model is located for start in range(0, count_sentences, batch_size): end = start + batch_size batch_sentences = sentences[start:end] inputs = tokenizer(batch_sentences, padding=True, truncation=True, max_length=512, return_tensors="pt") inputs = {key: val.to(device) for key, val in inputs.items()} # Move input tensors to the same device as the model with torch.inference_mode(): outputs = model.encode(inputs['input_ids'], inputs['attention_mask']) batch_embeddings = F.normalize(outputs.pooler_output.detach().float(), dim=-1) embeddings.append(batch_embeddings.cpu()) return torch.cat(embeddings, dim=0) # Concatenate all computed embeddings into a single tensor