Spaces:
Sleeping
Sleeping
Commit
·
e327a9e
1
Parent(s):
e01e28e
Update default GPU layer, temperature values
Browse files
utils.py
CHANGED
|
@@ -19,12 +19,12 @@ from llama_cpp import Llama, LlamaGrammar, json_schema_to_gbnf
|
|
| 19 |
URL = "http://localhost:5834/v1/chat/completions"
|
| 20 |
in_memory_llm = None
|
| 21 |
|
| 22 |
-
N_GPU_LAYERS = env.get("N_GPU_LAYERS",
|
| 23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
| 24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
| 25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
| 26 |
MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
|
| 27 |
-
TEMPERATURE = float(env.get("TEMPERATURE", 0.
|
| 28 |
|
| 29 |
if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
|
| 30 |
print(f"Using local model from {LLM_MODEL_PATH}")
|
|
|
|
| 19 |
URL = "http://localhost:5834/v1/chat/completions"
|
| 20 |
in_memory_llm = None
|
| 21 |
|
| 22 |
+
N_GPU_LAYERS = env.get("N_GPU_LAYERS", -1) # Default to -1, which means use all layers if available
|
| 23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
| 24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
| 25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
| 26 |
MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
|
| 27 |
+
TEMPERATURE = float(env.get("TEMPERATURE", 0.3))
|
| 28 |
|
| 29 |
if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
|
| 30 |
print(f"Using local model from {LLM_MODEL_PATH}")
|