Kevin Hu
commited on
Commit
·
2435d05
1
Parent(s):
5294086
Truncate text for zhipu embedding. (#4490)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
rag/llm/embedding_model.py
CHANGED
@@ -217,6 +217,14 @@ class ZhipuEmbed(Base):
|
|
217 |
def encode(self, texts: list):
|
218 |
arr = []
|
219 |
tks_num = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
for txt in texts:
|
221 |
res = self.client.embeddings.create(input=txt,
|
222 |
model=self.model_name)
|
|
|
217 |
def encode(self, texts: list):
|
218 |
arr = []
|
219 |
tks_num = 0
|
220 |
+
MAX_LEN = -1
|
221 |
+
if self.model_name.lower() == "embedding-2":
|
222 |
+
MAX_LEN = 512
|
223 |
+
if self.model_name.lower() == "embedding-3":
|
224 |
+
MAX_LEN = 3072
|
225 |
+
if MAX_LEN > 0:
|
226 |
+
texts = [truncate(t, MAX_LEN) for t in texts]
|
227 |
+
|
228 |
for txt in texts:
|
229 |
res = self.client.embeddings.create(input=txt,
|
230 |
model=self.model_name)
|