Kevin Hu commited on
Commit
2435d05
·
1 Parent(s): 5294086

Truncate text for zhipu embedding. (#4490)

Browse files

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (1) hide show
  1. rag/llm/embedding_model.py +8 -0
rag/llm/embedding_model.py CHANGED
@@ -217,6 +217,14 @@ class ZhipuEmbed(Base):
217
  def encode(self, texts: list):
218
  arr = []
219
  tks_num = 0
 
 
 
 
 
 
 
 
220
  for txt in texts:
221
  res = self.client.embeddings.create(input=txt,
222
  model=self.model_name)
 
217
  def encode(self, texts: list):
218
  arr = []
219
  tks_num = 0
220
+ MAX_LEN = -1
221
+ if self.model_name.lower() == "embedding-2":
222
+ MAX_LEN = 512
223
+ if self.model_name.lower() == "embedding-3":
224
+ MAX_LEN = 3072
225
+ if MAX_LEN > 0:
226
+ texts = [truncate(t, MAX_LEN) for t in texts]
227
+
228
  for txt in texts:
229
  res = self.client.embeddings.create(input=txt,
230
  model=self.model_name)