Spaces:
Build error
Build error
change method to Llama.from_pretrained
Browse filesto load the model directly from the HF repository
app.py
CHANGED
|
@@ -74,12 +74,14 @@ def genRANstring(n):
|
|
| 74 |
def create_chat():
|
| 75 |
# Set HF API token and HF repo
|
| 76 |
from llama_cpp import Llama
|
| 77 |
-
modelfile = hf_hub_download(
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
)
|
| 81 |
-
client = Llama(
|
| 82 |
-
|
|
|
|
|
|
|
| 83 |
#n_gpu_layers=-1, #enable GPU
|
| 84 |
n_threads =2,
|
| 85 |
temperature=0.24,
|
|
@@ -90,7 +92,7 @@ def create_chat():
|
|
| 90 |
flash_attn=True,
|
| 91 |
verbose=verbosity,
|
| 92 |
)
|
| 93 |
-
print('loading gemma-2-2b-it-
|
| 94 |
return client
|
| 95 |
|
| 96 |
|
|
|
|
| 74 |
def create_chat():
|
| 75 |
# Set HF API token and HF repo
|
| 76 |
from llama_cpp import Llama
|
| 77 |
+
#modelfile = hf_hub_download(
|
| 78 |
+
# repo_id=os.environ.get("REPO_ID", "bartowski/gemma-2-2b-it-GGUF"),
|
| 79 |
+
# filename=os.environ.get("MODEL_FILE", "gemma-2-2b-it-Q5_K_M.gguf"),
|
| 80 |
+
#)
|
| 81 |
+
client = Llama.from_pretrained(
|
| 82 |
+
repo_id="bartowski/gemma-2-2b-it-GGUF",
|
| 83 |
+
filename="gemma-2-2b-it-Q4_K_S.gguf",
|
| 84 |
+
#model_path=modelfile,
|
| 85 |
#n_gpu_layers=-1, #enable GPU
|
| 86 |
n_threads =2,
|
| 87 |
temperature=0.24,
|
|
|
|
| 92 |
flash_attn=True,
|
| 93 |
verbose=verbosity,
|
| 94 |
)
|
| 95 |
+
print('loading gemma-2-2b-it-Q4_K_S.gguf with LlamaCPP...')
|
| 96 |
return client
|
| 97 |
|
| 98 |
|