openbmb
/

MiniCPM-Embedding

Feature Extraction

sentence-transformers

Inference Endpoints

Model card Files Files and versions Community

Kaguya-19 commited on 15 days ago

Commit

dc0f82b

·

verified ·

1 Parent(s): 3cc1148

Update README.md

Files changed (1) hide show

README.md +6 -3

README.md CHANGED Viewed

@@ -344,7 +344,6 @@ When running evaluation on BEIR and C-MTEB/Retrieval, we use instructions in `in
 ```
 transformers==4.37.2
-flash-attn>2.3.5
 ```
 ### 示例脚本 Demo
@@ -358,7 +357,9 @@ import torch.nn.functional as F
 model_name = "openbmb/MiniCPM-Embedding"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
 model.eval()
 # 由于在 `model.forward` 中缩放了最终隐层表示，此处的 mean pooling 实际上起到了 weighted mean pooling 的作用
@@ -402,7 +403,9 @@ import torch
 from sentence_transformers import SentenceTransformer
 model_name = "openbmb/MiniCPM-Embedding"
-model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={"attn_implementation": "flash_attention_2", "torch_dtype": torch.float16})
 queries = ["中国的首都是哪里？"]
 passages = ["beijing", "shanghai"]

 ```
 transformers==4.37.2
 ```
 ### 示例脚本 Demo
 model_name = "openbmb/MiniCPM-Embedding"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.float16).to("cuda")
+# You can also use the following line to enable the Flash Attention 2 implementation
+# model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
 model.eval()
 # 由于在 `model.forward` 中缩放了最终隐层表示，此处的 mean pooling 实际上起到了 weighted mean pooling 的作用
 from sentence_transformers import SentenceTransformer
 model_name = "openbmb/MiniCPM-Embedding"
+model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={ "torch_dtype": torch.float16})
+# You can also use the following line to enable the Flash Attention 2 implementation
+# model = SentenceTransformer(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", model_kwargs={ "torch_dtype": torch.float16})
 queries = ["中国的首都是哪里？"]
 passages = ["beijing", "shanghai"]