Added RAG and reasoning examples
Browse files
README.md
CHANGED
@@ -311,6 +311,51 @@ print(llm.create_chat_completion(
|
|
311 |
))
|
312 |
```
|
313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
<!-- README_GGUF.md-how-to-run end -->
|
315 |
|
316 |
<!-- original-model-card start -->
|
|
|
311 |
))
|
312 |
```
|
313 |
|
314 |
+
#### Simple llama-cpp-python RAG code, requires [PR#1440](https://github.com/abetlen/llama-cpp-python/pull/1440)
|
315 |
+
|
316 |
+
```python
|
317 |
+
from llama_cpp import Llama
|
318 |
+
|
319 |
+
llm = Llama(model_path="./granite-3.2-8b-instruct.IQ4_XS.gguf", n_gpu_layers=41, n_ctx=131072)
|
320 |
+
|
321 |
+
print(llm.create_chat_completion(
|
322 |
+
messages = [
|
323 |
+
{
|
324 |
+
"role": "user",
|
325 |
+
"content": "Write a short summary of each document please."
|
326 |
+
}
|
327 |
+
],
|
328 |
+
documents = [
|
329 |
+
{
|
330 |
+
"text": "Lorem ipsum",
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"text": "Dolor sit amet",
|
334 |
+
}
|
335 |
+
]
|
336 |
+
))
|
337 |
+
```
|
338 |
+
|
339 |
+
#### Simple llama-cpp-python reasoning code, requires [PR#1440](https://github.com/abetlen/llama-cpp-python/pull/1440)
|
340 |
+
|
341 |
+
```python
|
342 |
+
from llama_cpp import Llama
|
343 |
+
|
344 |
+
llm = Llama(model_path="./granite-3.2-8b-instruct.IQ4_XS.gguf", n_gpu_layers=41, n_ctx=131072)
|
345 |
+
|
346 |
+
print(llm.create_chat_completion(
|
347 |
+
messages = [
|
348 |
+
{
|
349 |
+
"role": "user",
|
350 |
+
"content": "You have 10 liters of a 30% acid solution. How many liters of a 70% acid solution must be added to achieve a 50% acid mixture?"
|
351 |
+
}
|
352 |
+
],
|
353 |
+
template_kwargs = {
|
354 |
+
"thinking": True
|
355 |
+
}
|
356 |
+
))
|
357 |
+
```
|
358 |
+
|
359 |
<!-- README_GGUF.md-how-to-run end -->
|
360 |
|
361 |
<!-- original-model-card start -->
|