import os import torch import yaml from models.multimodal_encoder.t5_encoder import T5Embedder GPU = 0 MODEL_PATH = "google/t5-v1_1-xxl" CONFIG_PATH = "configs/base.yaml" SAVE_DIR = "lang_embed/" # Modify this to your task name and instruction TASK_NAME = "anubis_carrot_to_bag" # INSTRUCTION = "take the towel off the kirby doll" # INSTRUCTION = "insert the brush to the dustpan" INSTRUCTION = "pick up the carrot and put into the bag" # Note: if your GPU VRAM is less than 24GB, # it is recommended to enable offloading by specifying an offload directory. # OFFLOAD_DIR = '/home/jellyho/OFFLOAD' # Specify your offload directory here, ensuring the directory exists. def main(): with open(CONFIG_PATH, "r") as fp: config = yaml.safe_load(fp) device = torch.device(f"cuda:{GPU}") text_embedder = T5Embedder( from_pretrained=MODEL_PATH, model_max_length=config["dataset"]["tokenizer_max_length"], device=device, # use_offload_folder=OFFLOAD_DIR ) tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model tokens = tokenizer( INSTRUCTION, return_tensors="pt", padding="longest", truncation=True )["input_ids"].to(device) tokens = tokens.view(1, -1) with torch.no_grad(): pred = text_encoder(tokens).last_hidden_state.detach().cpu() save_path = os.path.join(SAVE_DIR, f"{TASK_NAME}.pt") # We save the embeddings in a dictionary format torch.save({ "name": TASK_NAME, "instruction": INSTRUCTION, "embeddings": pred }, save_path ) print(f'\"{INSTRUCTION}\" from \"{TASK_NAME}\" is encoded by \"{MODEL_PATH}\" into shape {pred.shape} and saved to \"{save_path}\"') if __name__ == "__main__": main()