agents-course-final-SamerPF

Build error

SamerPF commited on May 5

Commit

d219d08

verified ·

1 Parent(s): 0808f0f

Update rag_agent.py

Files changed (1) hide show

rag_agent.py CHANGED Viewed

@@ -5,7 +5,7 @@ from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import os
 from huggingface_hub import login
@@ -22,15 +22,23 @@ login(token=hf_token)
 Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # ==== 2. Load Hugging Face LLM (Locally Installed or Remote Hosted) ====
-llm = HuggingFaceLLM(
-    model_name="unsloth/Mistral-Small-3.1-24B-Instruct-2503-GGUF",  # You must have access!
-    tokenizer_name="unsloth/Mistral-Small-3.1-24B-Instruct-2503-GGUF",
-    context_window=2048,
     max_new_tokens=512,
-    generate_kwargs={"temperature": 0.1},
-    tokenizer_kwargs={"padding_side": "left"},
-    device_map="auto"  # Automatically assign model layers to available devices
 )
 Settings.llm = llm  # Apply to global settings

 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from llama_index.llms.llama_cpp import LlamaCPP
 import os
 from huggingface_hub import login
 Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # ==== 2. Load Hugging Face LLM (Locally Installed or Remote Hosted) ====
+#llm = HuggingFaceLLM(
+#    model_name="unsloth/Mistral-Small-3.1-24B-Instruct-2503-GGUF",  # You must have access!
+#    tokenizer_name="unsloth/Mistral-Small-3.1-24B-Instruct-2503-GGUF",
+#    context_window=2048,
+#    max_new_tokens=512,
+#    generate_kwargs={"temperature": 0.1},
+#    tokenizer_kwargs={"padding_side": "left"},
+#    device_map="auto"  # Automatically assign model layers to available devices
+#
+#)
+llm = LlamaCPP(
+    model_path="/path/to/your/model.gguf",
+    temperature=0.1,
     max_new_tokens=512,
+    context_window=4096,
+    generate_kwargs={"stop": ["</s>"]},
+    model_kwargs={"n_threads": 4},  # adjust for your CPU
 )
 Settings.llm = llm  # Apply to global settings