Spaces:
Paused
Paused
fix(runner.sh): --enforce-eager not support values
Browse files- run-llama.sh +0 -1
- run-sailor.sh +0 -1
- runner.sh +0 -1
run-llama.sh
CHANGED
@@ -27,5 +27,4 @@ python -u /app/openai_compatible_api_server.py \
|
|
27 |
--max-num-batched-tokens 32768 \
|
28 |
--max-model-len 32768 \
|
29 |
--dtype float16 \
|
30 |
-
--enforce-eager false \
|
31 |
--gpu-memory-utilization 0.85
|
|
|
27 |
--max-num-batched-tokens 32768 \
|
28 |
--max-model-len 32768 \
|
29 |
--dtype float16 \
|
|
|
30 |
--gpu-memory-utilization 0.85
|
run-sailor.sh
CHANGED
@@ -29,5 +29,4 @@ python -u /app/openai_compatible_api_server.py \
|
|
29 |
--max-num-batched-tokens 32768 \
|
30 |
--max-model-len 32768 \
|
31 |
--dtype float16 \
|
32 |
-
--enforce-eager false \
|
33 |
--gpu-memory-utilization 0.85
|
|
|
29 |
--max-num-batched-tokens 32768 \
|
30 |
--max-model-len 32768 \
|
31 |
--dtype float16 \
|
|
|
32 |
--gpu-memory-utilization 0.85
|
runner.sh
CHANGED
@@ -51,5 +51,4 @@ python -u /app/openai_compatible_api_server.py \
|
|
51 |
--max-num-batched-tokens 32768 \
|
52 |
--max-model-len 32768 \
|
53 |
--dtype float16 \
|
54 |
-
--enforce-eager false \
|
55 |
--gpu-memory-utilization 0.9
|
|
|
51 |
--max-num-batched-tokens 32768 \
|
52 |
--max-model-len 32768 \
|
53 |
--dtype float16 \
|
|
|
54 |
--gpu-memory-utilization 0.9
|