Resolves #2905 openai compatible model provider add llama.cpp rerank support (#2906)
Browse files### What problem does this PR solve?
Resolve #2905
due to the in-consistent of token size, I make it safe to limit 500 in
code, since there is no config param to control
my llama.cpp run set -ub to 1024:
${llama_path}/bin/llama-server --host 0.0.0.0 --port 9901 -ub 1024 -ngl
99 -m $gguf_file --reranking "$@"
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Here is my test Ragflow use llama.cpp
```
lot update_slots: id 0 | task 458 | prompt done, n_past = 416, n_tokens = 416
slot release: id 0 | task 458 | stop processing: n_past = 416, truncated = 0
slot launch_slot_: id 0 | task 459 | processing task
slot update_slots: id 0 | task 459 | tokenizing prompt, len = 2
slot update_slots: id 0 | task 459 | prompt tokenized, n_ctx_slot = 8192, n_keep = 0, n_prompt_tokens = 111
slot update_slots: id 0 | task 459 | kv cache rm [0, end)
slot update_slots: id 0 | task 459 | prompt processing progress, n_past = 111, n_tokens = 111, progress = 1.000000
slot update_slots: id 0 | task 459 | prompt done, n_past = 111, n_tokens = 111
slot release: id 0 | task 459 | stop processing: n_past = 111, truncated = 0
srv update_slots: all slots are idle
request: POST /rerank 172.23.0.4 200
```
- rag/llm/rerank_model.py +38 -2
@@ -242,10 +242,46 @@ class LmStudioRerank(Base):
|
|
242 |
|
243 |
class OpenAI_APIRerank(Base):
|
244 |
def __init__(self, key, model_name, base_url):
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
def similarity(self, query: str, texts: list):
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
|
250 |
|
251 |
class CoHereRerank(Base):
|
|
|
242 |
|
243 |
class OpenAI_APIRerank(Base):
|
244 |
def __init__(self, key, model_name, base_url):
|
245 |
+
if base_url.find("/rerank") == -1:
|
246 |
+
self.base_url = urljoin(base_url, "/rerank")
|
247 |
+
else:
|
248 |
+
self.base_url = base_url
|
249 |
+
self.headers = {
|
250 |
+
"Content-Type": "application/json",
|
251 |
+
"Authorization": f"Bearer {key}"
|
252 |
+
}
|
253 |
+
self.model_name = model_name
|
254 |
|
255 |
def similarity(self, query: str, texts: list):
|
256 |
+
# noway to config Ragflow , use fix setting
|
257 |
+
texts = [truncate(t, 500) for t in texts]
|
258 |
+
data = {
|
259 |
+
"model": self.model_name,
|
260 |
+
"query": query,
|
261 |
+
"documents": texts,
|
262 |
+
"top_n": len(texts),
|
263 |
+
}
|
264 |
+
token_count = 0
|
265 |
+
for t in texts:
|
266 |
+
token_count += num_tokens_from_string(t)
|
267 |
+
res = requests.post(self.base_url, headers=self.headers, json=data).json()
|
268 |
+
rank = np.zeros(len(texts), dtype=float)
|
269 |
+
if 'results' not in res:
|
270 |
+
raise ValueError("response not contains results\n" + str(res))
|
271 |
+
for d in res["results"]:
|
272 |
+
rank[d["index"]] = d["relevance_score"]
|
273 |
+
|
274 |
+
# Normalize the rank values to the range 0 to 1
|
275 |
+
min_rank = np.min(rank)
|
276 |
+
max_rank = np.max(rank)
|
277 |
+
|
278 |
+
# Avoid division by zero if all ranks are identical
|
279 |
+
if max_rank - min_rank != 0:
|
280 |
+
rank = (rank - min_rank) / (max_rank - min_rank)
|
281 |
+
else:
|
282 |
+
rank = np.zeros_like(rank)
|
283 |
+
|
284 |
+
return rank, token_count
|
285 |
|
286 |
|
287 |
class CoHereRerank(Base):
|