https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2 optimized with onnx o4 ``` from pathlib import Path from transformers import AutoTokenizer from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer from optimum.onnxruntime import AutoOptimizationConfig model = "cross-encoder/ms-marco-MiniLM-L-4-v2" tokenizer = AutoTokenizer.from_pretrained(model) ort_model = ORTModelForSequenceClassification.from_pretrained(model, export=True) save_dir = Path("/tmp/optimized_models") save_dir.mkdir(exist_ok=True, parents=True) optimizer = ORTOptimizer.from_pretrained(ort_model) optimizer.optimize( optimization_config=AutoOptimizationConfig.O4(), save_dir=save_dir, ) ``` Run it with onnx ``` import torch from transformers import AutoTokenizer from transformers.pipelines.text_classification import ClassificationFunction from optimum.pipelines import pipeline as ort_pipeline from optimum.onnxruntime import ORTModelForSequenceClassification model = "cross-encoder/ms-marco-MiniLM-L-4-v2" device = torch.device(0) if torch.cuda.is_available() else -1 tokenizer = AutoTokenizer.from_pretrained(model) ort_model = ORTModelForSequenceClassification.from_pretrained( model, file_name="model_optimized.onnx") cross_encoder = ort_pipeline( task="text-classification", model=ort_model, tokenizer=tokenizer, device=device, function_to_apply=ClassificationFunction.SIGMOID, padding=True, truncation=True) cross_encoder([{ "text": "What is the purpose of life?", "text_pair": "The purpose of life is subjective and determined by each individual. Some may believe the purpose of life is to seek knowledge and education, to find happiness and fulfillment, or to live with purpose by helping others." }]) ```