File size: 6,201 Bytes
b14d567
 
 
 
7a08a75
b14d567
0378a63
878e432
b14d567
 
 
 
 
 
 
fb60bd2
b14d567
878e432
b14d567
7a08a75
 
 
 
 
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
7a08a75
0378a63
7a08a75
b14d567
878e432
 
 
 
 
 
 
 
b14d567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
 
 
 
 
 
 
 
 
 
 
b14d567
 
 
 
878e432
fb60bd2
878e432
 
 
0378a63
b14d567
 
fb60bd2
878e432
 
 
 
 
 
fb60bd2
 
 
 
 
 
 
b14d567
 
 
 
 
878e432
 
 
 
 
 
 
b14d567
 
 
 
 
 
7a08a75
08f0bdc
7a08a75
 
b14d567
7a08a75
 
b14d567
7a08a75
08f0bdc
b14d567
7a08a75
b14d567
 
 
 
 
 
 
 
 
fb60bd2
 
 
b14d567
 
7a08a75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878e432
b14d567
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
<script lang="ts">
    import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
    import Badge from "@/lib/components/ui/badge/badge.svelte";
    import * as webllm from "@mlc-ai/web-llm";
    import { onMount, tick } from 'svelte';

    let selectedModel = "smollm-360M-instruct-add-basics-q0f32-MLC";

    let engine: webllm.MLCEngineInterface;
    let isLoading = false;
    let loadingStatus = '';
    let inputText = '';
    let outputText = '';
    let error = '';
    let completionSpeed: number | null = null;
    let tokensPerSecond: number | null = null;
    let isGenerating = false;
    let pendingRequest: string | null = null;

    const promptExamples = [
        "What is the capital of France?",
        "Tell me a story about a cat.",
        "Write a poem about the ocean.",
    ]

    async function setPrompt(prompt: string) {
        inputText = prompt;
        generateCompletion(prompt);
    }

    async function loadWebLLM() {
        isLoading = true;
        error = '';
        const initProgressCallback = (report: webllm.InitProgressReport) => {
            loadingStatus = report.text;
        };

        const appConfig: webllm.AppConfig = {
            model_list: [{
                model: `https://huggingface.co/reach-vb/smollm-360M-instruct-add-basics-q0f16-MLC`,
                model_id: 'smollm-360M-instruct-add-basics-q0f32-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            },
            {
                model: `https://huggingface.co/mlc-ai/Qwen2-0.5B-Instruct-q4f16_1-MLC`,
                model_id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC',
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/Qwen2-0.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            }
        ],
        };

        try {
            engine = await webllm.CreateMLCEngine(selectedModel, {
                appConfig,
                initProgressCallback,
                logLevel: "INFO",
            });
        } catch (err) {
            error = `Failed to load the model: ${(err as Error).message}`;
        } finally {
            isLoading = false;
        }
    }

    async function generateCompletion(content: string) {
        if (!engine || isGenerating) {
            /**
             * This is used to store the most recent request from user
             * while the current request is being processed.
             */
            pendingRequest = content.trim();
            return;
        }

        if (!content.trim()) return;

        isGenerating = true;
        const startTime = performance.now();
        try {
            console.log("Generating completion:", content);
            const response = await engine.chat.completions.create({
                messages: [
                    {role: "user", content: content}
                ],
                max_tokens: 15,
            });

            outputText = response.choices[0].message.content || "";

            // indicate that the response was cut short
            if (response.choices[0].finish_reason === "length") {
                outputText += "...";
            }

            const endTime = performance.now();
            const elapsedTimeInSeconds = (endTime - startTime) / 1000;
            completionSpeed = Math.round(endTime - startTime);
            
            const generatedTokens = response.usage?.completion_tokens || 0;
            tokensPerSecond = Math.round(generatedTokens / elapsedTimeInSeconds);
            
            error = '';
        } catch (err) {
            error = `Error: ${(err as Error).message}`;
        } finally {
            isGenerating = false;
            
            // process pending request if exists
            if (pendingRequest && pendingRequest !== content) {
                const nextRequest = pendingRequest;
                pendingRequest = null;
                await generateCompletion(nextRequest);
            }
        }
    }

    onMount(loadWebLLM);
</script>

<div class="flex my-20 flex-col items-center gap-4 max-w-xl mx-auto">
    <h1 class="text-center font-mono font-bold text-4xl">SmolLM 🤗</h1>
    <p class="text-center font-mono text-sm mb-4">Powered by <a href="https://huggingface.co/mlc-ai" target="_blank" class="underline text-blue-500">MLC</a> WebLLM <a class="underline text-blue-500" href="https://huggingface.co/HuggingFaceTB/smollm-360M-instruct-add-basics" target="_blank">SmolLM-360M-Instruct-Add-Basics</a> <span class="text-xs italic">(15 Max Tokens)</span></p>    

    <Textarea 
        bind:value={inputText}
        on:input={() => generateCompletion(inputText)}
        disabled={isLoading}
        class="w-full text-lg" 
        placeholder="Say something..."
    />
    <p class="text-center text-xs italic">This model doesn't work well with extremely creative prompts.</p>
    {#if isLoading}
        <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
    {:else if error}
        <p class="text-sm text-red-600">{error}</p>
    {:else}
        <div class="flex gap-2">
            {#if completionSpeed !== null}
                <Badge>{completionSpeed}ms</Badge>
            {/if}
            {#if tokensPerSecond !== null}
                <Badge>{tokensPerSecond} tok/s</Badge>
            {/if}
        </div>
    {/if}
    <div class="flex flex-col items-center mb-4">
        {#if inputText === ''}
        <p class="text-sm mb-2">Try these examples:</p>
        <div class="flex flex-wrap justify-center gap-2">
            {#each promptExamples as prompt}
                <button on:click={() => setPrompt(prompt)}>
                    <Badge
                        variant="outline"
                        class="cursor-pointer bg-orange-100 hover:bg-orange-200"
                    >
                        {prompt}
                    </Badge>
                </button>
            {/each}
        </div>
        {/if}
    </div>
    <pre class="text-xl font-bold whitespace-pre-wrap">{outputText}</pre>

</div>