Spaces:

Xenobd
/

GEMMA-1b-WEBGPU

Running

File size: 4,953 Bytes

5d037dd
 
 
fc9b90e
f3b6176
fc9b90e
 
5d037dd
f3b6176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc9b90e
f3b6176
 
 
 
 
6d7ed4f
f3b6176
 
 
 
82534fe
f3b6176
 
 
 
 
c187faa
f3b6176
 
 
 
 
fc9b90e
a981da6
 
f3b6176
fc9b90e
f3b6176
5d037dd
f3b6176
fc9b90e
a981da6
c187faa
fc9b90e
5d037dd
f3b6176
 
c187faa
f3b6176
fc9b90e
82534fe
 
f3b6176
 
 
 
a981da6
 
fc9b90e
82534fe
fc9b90e
 
 
 
 
 
c187faa
 
f3b6176
c187faa
 
fc9b90e
 
 
f3b6176
fc9b90e
 
f3b6176
 
fc9b90e
f3b6176
 
 
 
fc9b90e
a981da6
 
fc9b90e
 
c187faa
 
fc9b90e
 
c187faa
82534fe
fc9b90e
 
a981da6

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>ONNX WebGPU Download & Streaming Text Generation</title>
<script type="module">
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/dist/transformers.min.js';

// Helper: download a file with progress
async function downloadWithProgress(url, onProgress) {
    const response = await fetch(url);
    if (!response.ok) throw new Error("Network response was not ok");

    const contentLength = +response.headers.get("Content-Length");
    if (!contentLength) throw new Error("Content-Length header missing");

    const reader = response.body.getReader();
    let received = 0;
    const chunks = [];

    while(true) {
        const { done, value } = await reader.read();
        if (done) break;
        chunks.push(value);
        received += value.length;
        onProgress(received / contentLength);
    }

    const blob = new Blob(chunks);
    return blob;
}

async function runPrompt(prompt) {
    const progressBar = document.getElementById("progress");
    const progressText = document.getElementById("progressText");
    const outputElem = document.getElementById("output");

    // 1️⃣ Download model ONNX file manually
    const modelUrl = "https://huggingface.co/onnx-community/gemma-3-1b-it-ONNX/resolve/main/onnx/model_q4.onnx"; // example file
    console.log("%cDownloading model...", "color: orange; font-weight: bold;");
    outputElem.textContent = "Downloading model...\n";
    progressBar.style.width = "0%";
    progressText.textContent = "0%";

    await downloadWithProgress(modelUrl, (ratio) => {
        const pct = Math.floor(ratio * 100);
        progressBar.style.width = pct + "%";
        progressText.textContent = pct + "%";
    });

    console.log("%cModel downloaded!", "color: green; font-weight: bold;");
    outputElem.textContent += "Model downloaded.\n";

    // 2️⃣ Initialize generator using WebGPU
    outputElem.textContent += "Initializing pipeline...\n";
    const generator = await pipeline(
        "text-generation",
        "onnx-community/gemma-3-1b-it-ONNX",
        { dtype: "q4", providers: ["webgpu"] }
    );
    outputElem.textContent += "Pipeline ready. Generating text...\n";

    // 3️⃣ Stream output
    const messages = [
        { role: "system", content: "You are a helpful assistant." },
        { role: "user", content: prompt },
    ];

    let generatedTokens = 0;
    const maxTokens = 512;

    for await (const token of generator.stream(messages, { max_new_tokens: maxTokens, do_sample: false })) {
        outputElem.textContent += token.content;
        outputElem.scrollTop = outputElem.scrollHeight;

        generatedTokens++;
        const progress = Math.min((generatedTokens / maxTokens) * 100, 100);
        progressBar.style.width = progress + "%";
        progressText.textContent = Math.floor(progress) + "%";
    }

    progressBar.style.width = "100%";
    progressText.textContent = "100%";
    console.log("%cGeneration complete!", "color: blue; font-weight: bold;");
}

window.onload = () => {
    const form = document.getElementById("promptForm");
    form.onsubmit = (e) => {
        e.preventDefault();
        const prompt = document.getElementById("promptInput").value;
        document.getElementById("output").textContent = "";
        runPrompt(prompt);
    }
};
</script>
<style>
body { font-family: 'Segoe UI', sans-serif; background: #1e1e2f; color: #f0f0f0; display: flex; flex-direction: column; align-items: center; padding: 2rem; }
h1 { color: #ffcc00; margin-bottom: 1rem; }
#promptForm { display: flex; gap: 0.5rem; margin-bottom: 1rem; }
#promptInput { padding: 0.5rem; width: 400px; border-radius: 5px; border: none; font-size: 1rem; }
#generateBtn { padding: 0.5rem 1rem; background: #ffcc00; border: none; border-radius: 5px; font-weight: bold; cursor: pointer; color: #1e1e2f; }
#generateBtn:hover { background: #ffdd33; }
#progressContainer { width: 400px; height: 20px; background: #333; border-radius: 5px; overflow: hidden; margin-bottom: 1rem; position: relative; }
#progress { width: 0%; height: 100%; background: #ffcc00; transition: width 0.05s; }
#progressText { position: absolute; right: 10px; top: 0; bottom: 0; display: flex; align-items: center; justify-content: flex-end; font-size: 0.8rem; color: #1e1e2f; font-weight: bold; }
pre#output { width: 400px; background: #2e2e44; padding: 1rem; border-radius: 8px; white-space: pre-wrap; word-wrap: break-word; min-height: 150px; overflow-y: auto; }
</style>
</head>
<body>
<h1>ONNX WebGPU Streaming Text Generation</h1>
<form id="promptForm">
    <input id="promptInput" type="text" placeholder="Type your prompt..." required />
    <button id="generateBtn">Generate</button>
</form>
<div id="progressContainer">
    <div id="progress"></div>
    <div id="progressText">0%</div>
</div>
<pre id="output">Enter a prompt and hit Generate...</pre>
</body>
</html>