jpohhhh
/

embeddings_from_msmarco-MiniLM-L-6-v3

Feature Extraction

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

jpohhhh commited on Jun 21, 2023

Commit

917cd83

·

1 Parent(s): 8751910

Update handler.py

Files changed (1) hide show

handler.py +5 -17

handler.py CHANGED Viewed

@@ -3,22 +3,10 @@ from transformers import AutoTokenizer, AutoModel
 import torch
 #Mean Pooling - Take attention mask into account for correct averaging
-def max_pooling(model_output):
-    # Get dimensions
-    Z, Y = len(model_output[0]), len(model_output[0][0])
-    # Initialize an empty list with length Y (384 in your case)
-    output_array = [0] * Y
-    # Loop over secondary arrays (Z)
-    for i in range(Z):
-        # Loop over values in innermost arrays (Y)
-        for j in range(Y):
-            # If value is greater than current max, update max
-            if model_output[0][i][j] > output_array[j]:
-                output_array[j] = model_output[0][i][j]
-    return output_array
 class EndpointHandler():
     def __init__(self, path=""):
@@ -44,5 +32,5 @@ class EndpointHandler():
             model_output = self.model(**encoded_input)
         # Perform pooling. In this case, max pooling.
-        sentence_embeddings = max_pooling(model_output, encoded_input['attention_mask'])
         return sentence_embeddings.tolist()

 import torch
 #Mean Pooling - Take attention mask into account for correct averaging
+def mean_pooling(model_output, attention_mask):
+    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
+    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
 class EndpointHandler():
     def __init__(self, path=""):
             model_output = self.model(**encoded_input)
         # Perform pooling. In this case, max pooling.
+        sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
         return sentence_embeddings.tolist()