Spaces:

sanjeevbora
/

eCoreAI_Rag_llama

Runtime error

sanjeevbora commited on Oct 10, 2024

Commit

be3f145

verified ·

1 Parent(s): 8a531c3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -33,21 +33,21 @@ books_db = Chroma(persist_directory="./pdf_db", embedding_function=embeddings)
 books_db_client = books_db.as_retriever()
 # Initialize the model and tokenizer
-model_name = "stabilityai/stablelm-zephyr-3b"
-bnb_config = transformers.BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type='nf4',
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
 model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
 model = transformers.AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
     config=model_config,
-    quantization_config=bnb_config,
     device_map=device,
 )

 books_db_client = books_db.as_retriever()
 # Initialize the model and tokenizer
+model_name = "nltpt/Llama-3.2-3B-Instruct"
+# bnb_config = transformers.BitsAndBytesConfig(
+#     load_in_4bit=True,
+#     bnb_4bit_quant_type='nf4',
+#     bnb_4bit_use_double_quant=True,
+#     bnb_4bit_compute_dtype=torch.bfloat16
+# )
 model_config = transformers.AutoConfig.from_pretrained(model_name, max_new_tokens=1024)
 model = transformers.AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True,
     config=model_config,
+    # quantization_config=bnb_config,
     device_map=device,
 )