Spaces:

numind
/

NuMarkdown-8B-Thinking

Running on L40S

liamcripwell commited on Aug 8

Commit

7e4b5d9

verified ·

1 Parent(s): 841fa30

Update start.sh

Files changed (1) hide show

start.sh CHANGED Viewed

@@ -7,8 +7,8 @@ python3 -m vllm.entrypoints.openai.api_server \
     --model numind/NuMarkdown-8B-Thinking \
     --port 8000 \
     --host 0.0.0.0 \
-    --max-model-len 8048 \
-    --gpu-memory-utilization 0.9 \
     --disable-log-requests \
     --tensor-parallel-size 1 \
     --trust-remote-code > $HOME/app/vllm.log 2>&1 &

     --model numind/NuMarkdown-8B-Thinking \
     --port 8000 \
     --host 0.0.0.0 \
+    --max-model-len 20000 \
+    --gpu-memory-utilization 0.95 \
     --disable-log-requests \
     --tensor-parallel-size 1 \
     --trust-remote-code > $HOME/app/vllm.log 2>&1 &