fix the tokens error that occurred when adding the xinference model (#1527)
Browse files### What problem does this PR solve?
fix the tokens error that occurred when adding the xinference model
#1522
root@pc-gpu-86-41:~# curl -X 'POST' 'http://127.0.0.1:9997/v1/rerank' -H
'accept: application/json' -H 'Content-Type: application/json' -d '{
"model": "bge-reranker-v2-m3",
"query": "A man is eating pasta.",
"return_documents":"true",
"return_len":"true",
"documents": [
"A man is eating food.",
"A man is eating a piece of bread.",
"The girl is carrying a baby.",
"A man is riding a horse.",
"A woman is playing violin."
]
}'
{"id":"610a8724-3e96-11ef-81ce-08bfb886c012","results":[{"index":0,"relevance_score":0.999574601650238,"document":{"text":"A
man is eating
food."}},{"index":1,"relevance_score":0.07814773917198181,"document":{"text":"A
man is eating a piece of
bread."}},{"index":3,"relevance_score":0.000017700713215162978,"document":{"text":"A
man is riding a
horse."}},{"index":2,"relevance_score":0.0000163753629749408,"document":{"text":"The
girl is carrying a
baby."}},{"index":4,"relevance_score":0.00001631895975151565,"document":{"text":"A
woman is playing
violin."}}],"meta":{"api_version":null,"billed_units":null,"tokens":{"input_tokens":38,"output_tokens":38},"warnings":null}}
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
- rag/llm/rerank_model.py +1 -2
@@ -155,5 +155,4 @@ class XInferenceRerank(Base):
|
|
155 |
"documents": texts
|
156 |
}
|
157 |
res = requests.post(self.base_url, headers=self.headers, json=data).json()
|
158 |
-
return np.array([d["relevance_score"] for d in res["results"]]), res["tokens"]["input_tokens"]
|
159 |
-
"output_tokens"]
|
|
|
155 |
"documents": texts
|
156 |
}
|
157 |
res = requests.post(self.base_url, headers=self.headers, json=data).json()
|
158 |
+
return np.array([d["relevance_score"] for d in res["results"]]), res["meta"]["tokens"]["input_tokens"]+res["meta"]["tokens"]["output_tokens"]
|
|