File size: 6,759 Bytes
b14d567
 
 
 
7a08a75
b14d567
0378a63
878e432
b14d567
 
 
 
 
 
 
fb60bd2
b14d567
878e432
b14d567
7a08a75
 
62957ae
 
 
f6f47e4
7a08a75
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
7a08a75
0378a63
7a08a75
b14d567
878e432
 
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
 
 
 
 
 
 
 
 
 
 
b14d567
 
 
 
878e432
fb60bd2
878e432
 
 
0378a63
b14d567
 
fb60bd2
878e432
f6f47e4
 
878e432
 
 
fb60bd2
 
 
 
 
 
 
b14d567
 
 
 
 
878e432
 
 
 
 
 
 
b14d567
 
 
 
 
 
734c928
 
 
 
 
 
 
 
 
7a08a75
b14d567
7a08a75
 
b14d567
7a08a75
08f0bdc
b14d567
734c928
b14d567
 
 
 
 
 
 
 
 
fb60bd2
 
 
b14d567
 
7a08a75
9126f16
7a08a75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
b14d567
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
<script lang="ts">
    import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
    import Badge from "@/lib/components/ui/badge/badge.svelte";
    import * as webllm from "@mlc-ai/web-llm";
    import { onMount, tick } from 'svelte';

    let selectedModel = "smollm-360M-instruct-add-basics-q0f32-MLC";

    let engine: webllm.MLCEngineInterface;
    let isLoading = false;
    let loadingStatus = '';
    let inputText = '';
    let outputText = '';
    let error = '';
    let completionSpeed: number | null = null;
    let tokensPerSecond: number | null = null;
    let isGenerating = false;
    let pendingRequest: string | null = null;

    const promptExamples = [
        "Tell me a story about a cat.",
        "What is refraction?",
        "Explain thermal conductivity",
        "What is Newton's first law of motion?",
        "How do I make everything uppercase in Python?",
    ]

    async function setPrompt(prompt: string) {
        inputText = prompt;
        generateCompletion(prompt);
    }

    async function loadWebLLM() {
        isLoading = true;
        error = '';
        const initProgressCallback = (report: webllm.InitProgressReport) => {
            loadingStatus = report.text;
        };

        const appConfig: webllm.AppConfig = {
            model_list: [{
                model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
                model_id: 'smollm-360M-instruct-add-basics-q0f32-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            },
            {
                model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
                model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            }
        ],
        };

        try {
            engine = await webllm.CreateMLCEngine(selectedModel, {
                appConfig,
                initProgressCallback,
                logLevel: "INFO",
            });
        } catch (err) {
            error = `Failed to load the model: ${(err as Error).message}`;
        } finally {
            isLoading = false;
        }
    }

    async function generateCompletion(content: string) {
        if (!engine || isGenerating) {
            /**
             * This is used to store the most recent request from user
             * while the current request is being processed.
             */
            pendingRequest = content.trim();
            return;
        }

        if (!content.trim()) return;

        isGenerating = true;
        const startTime = performance.now();
        try {
            console.log("Generating completion:", content);
            const response = await engine.chat.completions.create({
                messages: [
                    {role: "user", content: content}
                ],
                max_tokens: 15,
            });

            outputText = response.choices[0].message.content || "";

            // indicate that the response was cut short if it doesn't end with a period
            if (response.choices[0].finish_reason === "length" && outputText[outputText.length - 1] !== ".") {
                outputText += "...";
            }

            const endTime = performance.now();
            const elapsedTimeInSeconds = (endTime - startTime) / 1000;
            completionSpeed = Math.round(endTime - startTime);
            
            const generatedTokens = response.usage?.completion_tokens || 0;
            tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
            
            error = '';
        } catch (err) {
            error = `Error: ${(err as Error).message}`;
        } finally {
            isGenerating = false;
            
            // process pending request if exists
            if (pendingRequest && pendingRequest !== content) {
                const nextRequest = pendingRequest;
                pendingRequest = null;
                await generateCompletion(nextRequest);
            }
        }
    }

    onMount(loadWebLLM);
</script>


<div class="flex my-12 flex-col items-center gap-6 max-w-xl mx-auto relative">
    <img 
      src="logo_smollm.png" 
      alt="logo" 
    class="absolute top-0 right-0 w-28 h-28 object-contain -mt-8 -mr-8 lg:-mr-16"
    />
    <h1 class="text-center font-sans font-bold text-5xl text-gray-800 mb-2">SmolLM Playground</h1>
    <p class="text-center font-sans text-sm text-gray-600 mb-4">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-gray-800">MLC</a> WebLLM <a class="underline text-gray-800" href="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-add-basics" target="_blank">SmolLM-360M-Instruct-Add-Basics</a> <span class="text-xs italic">(15 Max Tokens)</span></p>

    <Textarea 
        bind:value={inputText}
        on:input={() => generateCompletion(inputText)}
        disabled={isLoading}
        class="w-full text-lg" 
        placeholder="Say something..."
    />
    <p class="text-center font-sans text-xs text-gray-600 mb-4 italic">This is a smol model, go easy on it. Check out <a href="https://huggingface.co/spaces/HuggingFaceTB/SmolLM-360M-Instruct-WebGPU" target="_blank" class="underline text-gray-800">this demo</a> for full conversations.</p>
    {#if isLoading}
        <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
    {:else if error}
        <p class="text-sm text-red-600">{error}</p>
    {:else}
        <div class="flex gap-2">
            {#if completionSpeed !== null}
                <Badge>{completionSpeed}ms</Badge>
            {/if}
            {#if tokensPerSecond !== null}
                <Badge>{tokensPerSecond} tok/s</Badge>
            {/if}
        </div>
    {/if}
    <div class="flex flex-col items-center mb-4">
        {#if inputText === '' && !isLoading}
        <p class="text-sm mb-2">Try these examples:</p>
        <div class="flex flex-wrap justify-center gap-2">
            {#each promptExamples as prompt}
                <button on:click={() => setPrompt(prompt)}>
                    <Badge
                        variant="outline"
                        class="cursor-pointer bg-orange-100 hover:bg-orange-200"
                    >
                        {prompt}
                    </Badge>
                </button>
            {/each}
        </div>
        {/if}
    </div>
    <pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>

</div>