Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
JulienDelavande
commited on
Commit
·
f6e13e9
1
Parent(s):
e122263
thinking toggle
Browse files
PROMPTS.md
CHANGED
|
@@ -70,3 +70,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
|
|
| 70 |
```env
|
| 71 |
{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}
|
| 72 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
```env
|
| 71 |
{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}
|
| 72 |
```
|
| 73 |
+
|
| 74 |
+
## Qwen3
|
| 75 |
+
|
| 76 |
+
```env
|
| 77 |
+
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
| 78 |
+
```
|
src/lib/components/chat/ChatInput.svelte
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
import IconInternet from "$lib/components/icons/IconInternet.svelte";
|
| 7 |
import IconImageGen from "$lib/components/icons/IconImageGen.svelte";
|
| 8 |
import IconPaperclip from "$lib/components/icons/IconPaperclip.svelte";
|
|
|
|
| 9 |
import { useSettingsStore } from "$lib/stores/settings";
|
| 10 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
| 11 |
import {
|
|
@@ -37,6 +38,7 @@
|
|
| 37 |
modelIsMultimodal?: boolean;
|
| 38 |
children?: import("svelte").Snippet;
|
| 39 |
onPaste?: (e: ClipboardEvent) => void;
|
|
|
|
| 40 |
}
|
| 41 |
|
| 42 |
let {
|
|
@@ -51,6 +53,7 @@
|
|
| 51 |
modelIsMultimodal = false,
|
| 52 |
children,
|
| 53 |
onPaste,
|
|
|
|
| 54 |
}: Props = $props();
|
| 55 |
|
| 56 |
const onFileChange = async (e: Event) => {
|
|
@@ -68,7 +71,7 @@
|
|
| 68 |
let textareaElement: HTMLTextAreaElement | undefined = $state();
|
| 69 |
let isCompositionOn = $state(false);
|
| 70 |
|
| 71 |
-
const dispatch = createEventDispatcher<{ submit:
|
| 72 |
|
| 73 |
onMount(() => {
|
| 74 |
if (!isVirtualKeyboard()) {
|
|
@@ -121,8 +124,11 @@
|
|
| 121 |
!isVirtualKeyboard() &&
|
| 122 |
value.trim() !== ""
|
| 123 |
) {
|
| 124 |
-
event.preventDefault();
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
}
|
| 127 |
}
|
| 128 |
|
|
@@ -158,6 +164,9 @@
|
|
| 158 |
let showExtraTools = $derived(modelHasTools && !assistant);
|
| 159 |
|
| 160 |
let showNoTools = $derived(!showWebSearch && !showImageGen && !showFileUpload && !showExtraTools);
|
|
|
|
|
|
|
|
|
|
| 161 |
</script>
|
| 162 |
|
| 163 |
<div class="flex min-h-full flex-1 flex-col" onpaste={onPaste}>
|
|
@@ -227,6 +236,33 @@
|
|
| 227 |
</button>
|
| 228 |
</HoverTooltip>
|
| 229 |
{/if}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
{#if showImageGen}
|
| 231 |
<HoverTooltip
|
| 232 |
label="Generate images"
|
|
|
|
| 6 |
import IconInternet from "$lib/components/icons/IconInternet.svelte";
|
| 7 |
import IconImageGen from "$lib/components/icons/IconImageGen.svelte";
|
| 8 |
import IconPaperclip from "$lib/components/icons/IconPaperclip.svelte";
|
| 9 |
+
import IconThinking from "$lib/components/icons/IconThinking.svelte";
|
| 10 |
import { useSettingsStore } from "$lib/stores/settings";
|
| 11 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
| 12 |
import {
|
|
|
|
| 38 |
modelIsMultimodal?: boolean;
|
| 39 |
children?: import("svelte").Snippet;
|
| 40 |
onPaste?: (e: ClipboardEvent) => void;
|
| 41 |
+
showThinking?: boolean;
|
| 42 |
}
|
| 43 |
|
| 44 |
let {
|
|
|
|
| 53 |
modelIsMultimodal = false,
|
| 54 |
children,
|
| 55 |
onPaste,
|
| 56 |
+
showThinking = false,
|
| 57 |
}: Props = $props();
|
| 58 |
|
| 59 |
const onFileChange = async (e: Event) => {
|
|
|
|
| 71 |
let textareaElement: HTMLTextAreaElement | undefined = $state();
|
| 72 |
let isCompositionOn = $state(false);
|
| 73 |
|
| 74 |
+
const dispatch = createEventDispatcher<{ submit: { text: string } }>();
|
| 75 |
|
| 76 |
onMount(() => {
|
| 77 |
if (!isVirtualKeyboard()) {
|
|
|
|
| 124 |
!isVirtualKeyboard() &&
|
| 125 |
value.trim() !== ""
|
| 126 |
) {
|
| 127 |
+
event.preventDefault();
|
| 128 |
+
const textToSend = thinkingIsOn
|
| 129 |
+
? `${value.trim()} /think`
|
| 130 |
+
: `${value.trim()} /no_think`;
|
| 131 |
+
dispatch("submit", { text: textToSend });
|
| 132 |
}
|
| 133 |
}
|
| 134 |
|
|
|
|
| 164 |
let showExtraTools = $derived(modelHasTools && !assistant);
|
| 165 |
|
| 166 |
let showNoTools = $derived(!showWebSearch && !showImageGen && !showFileUpload && !showExtraTools);
|
| 167 |
+
|
| 168 |
+
let thinkingIsOn = $state(false);
|
| 169 |
+
|
| 170 |
</script>
|
| 171 |
|
| 172 |
<div class="flex min-h-full flex-1 flex-col" onpaste={onPaste}>
|
|
|
|
| 236 |
</button>
|
| 237 |
</HoverTooltip>
|
| 238 |
{/if}
|
| 239 |
+
{#if showThinking}
|
| 240 |
+
<HoverTooltip
|
| 241 |
+
label="Thinking"
|
| 242 |
+
position="top"
|
| 243 |
+
TooltipClassNames="text-xs !text-left !w-auto whitespace-nowrap !py-1 !mb-0 max-sm:hidden {thinkingIsOn
|
| 244 |
+
? 'hidden'
|
| 245 |
+
: ''}"
|
| 246 |
+
>
|
| 247 |
+
<button
|
| 248 |
+
class="base-tool"
|
| 249 |
+
class:active-tool={thinkingIsOn}
|
| 250 |
+
disabled={loading}
|
| 251 |
+
onclick={(e) => {
|
| 252 |
+
e.preventDefault();
|
| 253 |
+
thinkingIsOn = !thinkingIsOn;
|
| 254 |
+
}}
|
| 255 |
+
>
|
| 256 |
+
<IconThinking classNames="text-xl" />
|
| 257 |
+
{#if thinkingIsOn}
|
| 258 |
+
Thinking
|
| 259 |
+
{:else}
|
| 260 |
+
Not Thinking
|
| 261 |
+
{/if}
|
| 262 |
+
|
| 263 |
+
</button>
|
| 264 |
+
</HoverTooltip>
|
| 265 |
+
{/if}
|
| 266 |
{#if showImageGen}
|
| 267 |
<HoverTooltip
|
| 268 |
label="Generate images"
|
src/lib/components/chat/ChatMessage.svelte
CHANGED
|
@@ -313,7 +313,7 @@
|
|
| 313 |
<p
|
| 314 |
class="disabled w-full appearance-none whitespace-break-spaces text-wrap break-words bg-inherit px-5 py-3.5 text-gray-500 dark:text-gray-400"
|
| 315 |
>
|
| 316 |
-
{message.content.trim()}
|
| 317 |
</p>
|
| 318 |
{:else}
|
| 319 |
<form
|
|
@@ -329,7 +329,7 @@
|
|
| 329 |
class="w-full whitespace-break-spaces break-words rounded-xl bg-gray-100 px-5 py-3.5 text-gray-500 *:h-max dark:bg-gray-800 dark:text-gray-400"
|
| 330 |
rows="5"
|
| 331 |
bind:this={editContentEl}
|
| 332 |
-
value={message.content.trim()}
|
| 333 |
onkeydown={handleKeyDown}
|
| 334 |
required
|
| 335 |
></textarea>
|
|
|
|
| 313 |
<p
|
| 314 |
class="disabled w-full appearance-none whitespace-break-spaces text-wrap break-words bg-inherit px-5 py-3.5 text-gray-500 dark:text-gray-400"
|
| 315 |
>
|
| 316 |
+
{message.content.trim().replace(/\/(no_)?think$/, '')}
|
| 317 |
</p>
|
| 318 |
{:else}
|
| 319 |
<form
|
|
|
|
| 329 |
class="w-full whitespace-break-spaces break-words rounded-xl bg-gray-100 px-5 py-3.5 text-gray-500 *:h-max dark:bg-gray-800 dark:text-gray-400"
|
| 330 |
rows="5"
|
| 331 |
bind:this={editContentEl}
|
| 332 |
+
value={message.content.trim().replace(/\/(no_)?think$/, '')}
|
| 333 |
onkeydown={handleKeyDown}
|
| 334 |
required
|
| 335 |
></textarea>
|
src/lib/components/chat/ChatWindow.svelte
CHANGED
|
@@ -73,6 +73,9 @@
|
|
| 73 |
let isSharedRecently = $state(false);
|
| 74 |
let editMsdgId: Message["id"] | null = $state(null);
|
| 75 |
let pastedLongContent = $state(false);
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
beforeNavigate(() => {
|
| 78 |
if (page.params.id) {
|
|
@@ -88,11 +91,19 @@
|
|
| 88 |
continue: { id: Message["id"] };
|
| 89 |
}>();
|
| 90 |
|
| 91 |
-
const handleSubmit = () => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
if (loading) return;
|
| 93 |
-
|
|
|
|
|
|
|
| 94 |
message = "";
|
| 95 |
-
}
|
| 96 |
|
| 97 |
let lastTarget: EventTarget | null = null;
|
| 98 |
|
|
@@ -438,6 +449,7 @@
|
|
| 438 |
disabled={isReadOnly || lastIsError}
|
| 439 |
modelHasTools={currentModel.tools}
|
| 440 |
modelIsMultimodal={currentModel.multimodal}
|
|
|
|
| 441 |
/>
|
| 442 |
{/if}
|
| 443 |
|
|
|
|
| 73 |
let isSharedRecently = $state(false);
|
| 74 |
let editMsdgId: Message["id"] | null = $state(null);
|
| 75 |
let pastedLongContent = $state(false);
|
| 76 |
+
let showThinking = $derived(
|
| 77 |
+
currentModel.name === "Qwen/Qwen3_8B"
|
| 78 |
+
);
|
| 79 |
|
| 80 |
beforeNavigate(() => {
|
| 81 |
if (page.params.id) {
|
|
|
|
| 91 |
continue: { id: Message["id"] };
|
| 92 |
}>();
|
| 93 |
|
| 94 |
+
// const handleSubmit = () => {
|
| 95 |
+
// if (loading) return;
|
| 96 |
+
// dispatch("message", message);
|
| 97 |
+
// message = "";
|
| 98 |
+
// };
|
| 99 |
+
|
| 100 |
+
function handleSubmit(ev?: CustomEvent<{ text: string }>) {
|
| 101 |
if (loading) return;
|
| 102 |
+
|
| 103 |
+
const content = ev?.detail?.text ?? message;
|
| 104 |
+
dispatch("message", content);
|
| 105 |
message = "";
|
| 106 |
+
}
|
| 107 |
|
| 108 |
let lastTarget: EventTarget | null = null;
|
| 109 |
|
|
|
|
| 449 |
disabled={isReadOnly || lastIsError}
|
| 450 |
modelHasTools={currentModel.tools}
|
| 451 |
modelIsMultimodal={currentModel.multimodal}
|
| 452 |
+
showThinking={showThinking}
|
| 453 |
/>
|
| 454 |
{/if}
|
| 455 |
|
src/lib/components/icons/IconThinking.svelte
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<script lang="ts">
|
| 2 |
+
interface Props {
|
| 3 |
+
classNames?: string;
|
| 4 |
+
}
|
| 5 |
+
let { classNames = "" }: Props = $props();
|
| 6 |
+
</script>
|
| 7 |
+
|
| 8 |
+
<svg
|
| 9 |
+
class={classNames}
|
| 10 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 11 |
+
width="20"
|
| 12 |
+
height="20"
|
| 13 |
+
viewBox="0 0 24 24"
|
| 14 |
+
fill="none"
|
| 15 |
+
stroke="currentColor"
|
| 16 |
+
stroke-width="1.5"
|
| 17 |
+
stroke-linecap="round"
|
| 18 |
+
stroke-linejoin="round"
|
| 19 |
+
>
|
| 20 |
+
|
| 21 |
+
<path
|
| 22 |
+
stroke="none"
|
| 23 |
+
d="M0 0h24v24H0z"
|
| 24 |
+
fill="none"
|
| 25 |
+
/>
|
| 26 |
+
<path
|
| 27 |
+
d="M15.5 13a3.5 3.5 0 0 0 -3.5 3.5v1a3.5 3.5 0 0 0 7 0v-1.8" /><path d="M8.5 13a3.5 3.5 0 0 1 3.5 3.5v1a3.5 3.5 0 0 1 -7 0v-1.8" /><path d="M17.5 16a3.5 3.5 0 0 0 0 -7h-.5" /><path d="M19 9.3v-2.8a3.5 3.5 0 0 0 -7 0" /><path d="M6.5 16a3.5 3.5 0 0 1 0 -7h.5" /><path d="M5 9.3v-2.8a3.5 3.5 0 0 1 7 0v10" /></svg>
|
tgi_deploy.sh
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
cd text-generation-inference
|
| 3 |
+
conda create -n tgi python=3.11
|
| 4 |
+
eval "$(/home/user/miniconda3/bin/conda shell.bash hook)"
|
| 5 |
+
conda install -c conda-forge pkg-config openssl
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
conda activate tgi
|
| 9 |
+
export OPENSSL_DIR=$CONDA_PREFIX && \
|
| 10 |
+
export OPENSSL_INCLUDE_DIR=$CONDA_PREFIX/include && \
|
| 11 |
+
export OPENSSL_LIB_DIR=$CONDA_PREFIX/lib && \
|
| 12 |
+
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig
|
| 13 |
+
export PYTHONPATH=/home/user/miniconda3/envs/tgi/lib/python3.11/site-packages
|
| 14 |
+
export LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH
|
| 15 |
+
ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 /home/user/libnvidia-ml.so
|
| 16 |
+
|
| 17 |
+
nohup text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta -p 7860 &> qwen2.log &
|
| 18 |
+
|
| 19 |
+
PYTHONPATH=/home/user/:$PYTHONPATH \
|
| 20 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
|
| 21 |
+
text-generation-launcher \
|
| 22 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
| 23 |
+
--disable-custom-kernels \
|
| 24 |
+
-p 7860
|
| 25 |
+
|
| 26 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
|
| 27 |
+
text-generation-launcher \
|
| 28 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
| 29 |
+
-p 7860
|
| 30 |
+
|
| 31 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen2.5-VL-7B-Instruct -p 7860
|
| 32 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen3-8B -p 7860
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
text-generation-launcher \
|
| 36 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
| 37 |
+
--disable-custom-kernels
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# To run the server in the background, use:
|
| 41 |
+
nohup text-generation-launcher \
|
| 42 |
+
--model-id mistralai/Mistral-7B-v0.1 \
|
| 43 |
+
--port 8080 \
|
| 44 |
+
--max-batch-prefill-tokens 2048 \
|
| 45 |
+
--max-batch-total-tokens 4096 \
|
| 46 |
+
--max-input-length 4096 \
|
| 47 |
+
--max-total-tokens 8192 \
|
| 48 |
+
--max-batch-size 32 \
|
| 49 |
+
--max-waiting-tokens 20 \
|
| 50 |
+
--hostname 0.0.0.0 \
|
| 51 |
+
--cuda-memory-fraction 0.95 \
|
| 52 |
+
--max-concurrent-requests 128 \
|
| 53 |
+
--trust-remote-code \
|
| 54 |
+
--json-output > tgi.log 2>&1 &
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# To stop the server, use:
|
| 58 |
+
ps aux | grep text-generation-launcher
|
| 59 |
+
pkill -f text-generation-launcher
|
| 60 |
+
kill -9 $(nvidia-smi | grep python | awk '{ print $5 }')
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
curl https://jdelavande-dev-tgi.hf.space/generate \
|
| 65 |
+
-X POST \
|
| 66 |
+
-H "Content-Type: application/json" \
|
| 67 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|
| 68 |
+
|
| 69 |
+
curl https://jdelavande-dev-tgi2.hf.space/ \
|
| 70 |
+
-X POST \
|
| 71 |
+
-H "Content-Type: application/json" \
|
| 72 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|
| 73 |
+
|
| 74 |
+
curl localhost:7860/generate \
|
| 75 |
+
-X POST \
|
| 76 |
+
-H "Content-Type: application/json" \
|
| 77 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|