import * as webllm from "https://esm.run/@mlc-ai/web-llm"; // Global variables and configuration let selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC"; let uploadedBase64Image = ""; let modelInitialized = false; // Callback function to update initialization progress function initProgressCallback(report) { document.getElementById("download-status").textContent = report.text; document.getElementById("download-status").classList.remove("hidden"); } // Function to append messages to the chat box function appendMessage(message, role = "user") { const chatBox = document.getElementById("chat-box"); const container = document.createElement("div"); container.classList.add("message-container", role); const newMessage = document.createElement("div"); newMessage.classList.add("message"); newMessage.textContent = message; container.appendChild(newMessage); chatBox.appendChild(container); chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message } // Function to update the last message function updateLastMessage(content) { const messageDoms = document.querySelectorAll(".message"); const lastMessageDom = messageDoms[messageDoms.length - 1]; lastMessageDom.textContent = content; } // Function to check if both image and model are ready and enable the Send button function checkIfReadyToSend() { if (uploadedBase64Image && modelInitialized) { document.getElementById("send").disabled = false; } } // Main function to initialize the engine and process images async function main() { if (!uploadedBase64Image) { alert("Please upload an image first!"); return; } // Initialize the engine configuration const engineConfig = { initProgressCallback: initProgressCallback, logLevel: "INFO", }; const chatOpts = { context_window_size: 6144, }; // Create the engine const engine = await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts); // Indicate that the model is initialized modelInitialized = true; checkIfReadyToSend(); // Check if we can enable the Send button now // Construct chat messages with the uploaded image const messages = [ { role: "user", content: [ { type: "text", text: "Describe the uploaded image." }, { type: "image_url", image_url: { url: uploadedBase64Image } }, ], }, ]; // Send the chat request const request = { stream: false, messages: messages }; const reply = await engine.chat.completions.create(request); // Get the reply and display it const replyMessage = await engine.getMessage(); appendMessage(replyMessage, "assistant"); document.getElementById("send").disabled = false; console.log(reply); } // Handle file uploads document.getElementById("image-input").addEventListener("change", async function(event) { const file = event.target.files[0]; if (file) { uploadedBase64Image = await imageFileToBase64(file); console.log("Image uploaded and converted to base64"); checkIfReadyToSend(); // Check if we can enable the Send button now } }); // Set up UI bindings and event listeners document.getElementById("download").addEventListener("click", async function () { selectedModel = document.getElementById("model-selection").value; await main(); // Initialize and run the model }); document.getElementById("send").addEventListener("click", function () { const input = document.getElementById("user-input").value.trim(); if (input.length === 0) return; appendMessage(input, "user"); document.getElementById("user-input").value = ""; document.getElementById("user-input").setAttribute("placeholder", "Generating..."); // Additional logic for new user questions can be added here }); // Populate model selection dropdown const availableModels = ["Phi-3.5-vision-instruct-q4f16_1-MLC", "Phi-3.5-vision-instruct-q4f32_1-MLC"]; availableModels.forEach((modelId) => { const option = document.createElement("option"); option.value = modelId; option.textContent = modelId; document.getElementById("model-selection").appendChild(option); }); document.getElementById("model-selection").value = selectedModel;