Tom Aarsen
		
	commited on
		
		
					Commit 
							
							·
						
						3dad7c2
	
1
								Parent(s):
							
							5a14b88
								
Add Sentence Transformers integration + README
Browse files- 1_Pooling/config.json +10 -0
 - README.md +33 -0
 - config_sentence_transformers.json +11 -0
 - modules.json +20 -0
 - sentence_bert_config.json +4 -0
 
    	
        1_Pooling/config.json
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "word_embedding_dimension": 768,
         
     | 
| 3 | 
         
            +
              "pooling_mode_cls_token": true,
         
     | 
| 4 | 
         
            +
              "pooling_mode_mean_tokens": false,
         
     | 
| 5 | 
         
            +
              "pooling_mode_max_tokens": false,
         
     | 
| 6 | 
         
            +
              "pooling_mode_mean_sqrt_len_tokens": false,
         
     | 
| 7 | 
         
            +
              "pooling_mode_weightedmean_tokens": false,
         
     | 
| 8 | 
         
            +
              "pooling_mode_lasttoken": false,
         
     | 
| 9 | 
         
            +
              "include_prompt": true
         
     | 
| 10 | 
         
            +
            }
         
     | 
    	
        README.md
    CHANGED
    
    | 
         @@ -2936,6 +2936,39 @@ Based on the [intfloat/e5-large-unsupervised](https://huggingface.co/intfloat/e5 
     | 
|
| 2936 | 
         
             
            ## Usage
         
     | 
| 2937 | 
         | 
| 2938 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2939 | 
         
             
            ### Using Huggingface transformers
         
     | 
| 2940 | 
         | 
| 2941 | 
         | 
| 
         | 
|
| 2936 | 
         
             
            ## Usage
         
     | 
| 2937 | 
         | 
| 2938 | 
         | 
| 2939 | 
         
            +
            ### Using Sentence Transformers
         
     | 
| 2940 | 
         
            +
             
     | 
| 2941 | 
         
            +
            You can use the sentence-transformers package to use an snowflake-arctic-embed model, as shown below. 
         
     | 
| 2942 | 
         
            +
             
     | 
| 2943 | 
         
            +
            ```python
         
     | 
| 2944 | 
         
            +
            from sentence_transformers import SentenceTransformer
         
     | 
| 2945 | 
         
            +
             
     | 
| 2946 | 
         
            +
            model = SentenceTransformer("Snowflake/snowflake-arctic-embed-m")
         
     | 
| 2947 | 
         
            +
             
     | 
| 2948 | 
         
            +
            queries = ['what is snowflake?', 'Where can I get the best tacos?']
         
     | 
| 2949 | 
         
            +
            documents = ['The Data Cloud!', 'Mexico City of Course!']
         
     | 
| 2950 | 
         
            +
             
     | 
| 2951 | 
         
            +
            query_embeddings = model.encode(queries, prompt_name="query")
         
     | 
| 2952 | 
         
            +
            document_embeddings = model.encode(documents)
         
     | 
| 2953 | 
         
            +
             
     | 
| 2954 | 
         
            +
            scores = query_embeddings @ document_embeddings.T
         
     | 
| 2955 | 
         
            +
            for query, query_scores in zip(queries, scores):
         
     | 
| 2956 | 
         
            +
                doc_score_pairs = list(zip(documents, query_scores))
         
     | 
| 2957 | 
         
            +
                doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
         
     | 
| 2958 | 
         
            +
                # Output passages & scores
         
     | 
| 2959 | 
         
            +
                print("Query:", query)
         
     | 
| 2960 | 
         
            +
                for document, score in doc_score_pairs:
         
     | 
| 2961 | 
         
            +
                    print(score, document)
         
     | 
| 2962 | 
         
            +
            ```
         
     | 
| 2963 | 
         
            +
            ```
         
     | 
| 2964 | 
         
            +
            Query: what is snowflake?
         
     | 
| 2965 | 
         
            +
            0.20051965 The Data Cloud!
         
     | 
| 2966 | 
         
            +
            0.07660701 Mexico City of Course!
         
     | 
| 2967 | 
         
            +
            Query: Where can I get the best tacos?
         
     | 
| 2968 | 
         
            +
            0.24481852 Mexico City of Course!
         
     | 
| 2969 | 
         
            +
            0.15664819 The Data Cloud!
         
     | 
| 2970 | 
         
            +
            ```
         
     | 
| 2971 | 
         
            +
             
     | 
| 2972 | 
         
             
            ### Using Huggingface transformers
         
     | 
| 2973 | 
         | 
| 2974 | 
         | 
    	
        config_sentence_transformers.json
    ADDED
    
    | 
         @@ -0,0 +1,11 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "__version__": {
         
     | 
| 3 | 
         
            +
                "sentence_transformers": "2.7.0.dev0",
         
     | 
| 4 | 
         
            +
                "transformers": "4.39.3",
         
     | 
| 5 | 
         
            +
                "pytorch": "2.1.0+cu121"
         
     | 
| 6 | 
         
            +
              },
         
     | 
| 7 | 
         
            +
              "prompts": {
         
     | 
| 8 | 
         
            +
                "query": "Represent this sentence for searching relevant passages: "
         
     | 
| 9 | 
         
            +
              },
         
     | 
| 10 | 
         
            +
              "default_prompt_name": null
         
     | 
| 11 | 
         
            +
            }
         
     | 
    	
        modules.json
    ADDED
    
    | 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            [
         
     | 
| 2 | 
         
            +
              {
         
     | 
| 3 | 
         
            +
                "idx": 0,
         
     | 
| 4 | 
         
            +
                "name": "0",
         
     | 
| 5 | 
         
            +
                "path": "",
         
     | 
| 6 | 
         
            +
                "type": "sentence_transformers.models.Transformer"
         
     | 
| 7 | 
         
            +
              },
         
     | 
| 8 | 
         
            +
              {
         
     | 
| 9 | 
         
            +
                "idx": 1,
         
     | 
| 10 | 
         
            +
                "name": "1",
         
     | 
| 11 | 
         
            +
                "path": "1_Pooling",
         
     | 
| 12 | 
         
            +
                "type": "sentence_transformers.models.Pooling"
         
     | 
| 13 | 
         
            +
              },
         
     | 
| 14 | 
         
            +
              {
         
     | 
| 15 | 
         
            +
                "idx": 2,
         
     | 
| 16 | 
         
            +
                "name": "2",
         
     | 
| 17 | 
         
            +
                "path": "2_Normalize",
         
     | 
| 18 | 
         
            +
                "type": "sentence_transformers.models.Normalize"
         
     | 
| 19 | 
         
            +
              }
         
     | 
| 20 | 
         
            +
            ]
         
     | 
    	
        sentence_bert_config.json
    ADDED
    
    | 
         @@ -0,0 +1,4 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "max_seq_length": 512,
         
     | 
| 3 | 
         
            +
              "do_lower_case": false
         
     | 
| 4 | 
         
            +
            }
         
     |