vision-test / vision_model.js
atlury's picture
Upload 4 files
367aca7 verified
import * as webllm from "https://esm.run/@mlc-ai/web-llm";
// Global variables and configuration
let selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC";
let uploadedBase64Image = "";
let modelInitialized = false;
// Callback function to update initialization progress
function initProgressCallback(report) {
document.getElementById("download-status").textContent = report.text;
document.getElementById("download-status").classList.remove("hidden");
}
// Function to append messages to the chat box
function appendMessage(message, role = "user") {
const chatBox = document.getElementById("chat-box");
const container = document.createElement("div");
container.classList.add("message-container", role);
const newMessage = document.createElement("div");
newMessage.classList.add("message");
newMessage.textContent = message;
container.appendChild(newMessage);
chatBox.appendChild(container);
chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message
}
// Function to update the last message
function updateLastMessage(content) {
const messageDoms = document.querySelectorAll(".message");
const lastMessageDom = messageDoms[messageDoms.length - 1];
lastMessageDom.textContent = content;
}
// Function to check if both image and model are ready and enable the Send button
function checkIfReadyToSend() {
if (uploadedBase64Image && modelInitialized) {
document.getElementById("send").disabled = false;
}
}
// Main function to initialize the engine and process images
async function main() {
if (!uploadedBase64Image) {
alert("Please upload an image first!");
return;
}
// Initialize the engine configuration
const engineConfig = {
initProgressCallback: initProgressCallback,
logLevel: "INFO",
};
const chatOpts = {
context_window_size: 6144,
};
// Create the engine
const engine = await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts);
// Indicate that the model is initialized
modelInitialized = true;
checkIfReadyToSend(); // Check if we can enable the Send button now
// Construct chat messages with the uploaded image
const messages = [
{
role: "user",
content: [
{ type: "text", text: "Describe the uploaded image." },
{ type: "image_url", image_url: { url: uploadedBase64Image } },
],
},
];
// Send the chat request
const request = { stream: false, messages: messages };
const reply = await engine.chat.completions.create(request);
// Get the reply and display it
const replyMessage = await engine.getMessage();
appendMessage(replyMessage, "assistant");
document.getElementById("send").disabled = false;
console.log(reply);
}
// Handle file uploads
document.getElementById("image-input").addEventListener("change", async function(event) {
const file = event.target.files[0];
if (file) {
uploadedBase64Image = await imageFileToBase64(file);
console.log("Image uploaded and converted to base64");
checkIfReadyToSend(); // Check if we can enable the Send button now
}
});
// Set up UI bindings and event listeners
document.getElementById("download").addEventListener("click", async function () {
selectedModel = document.getElementById("model-selection").value;
await main(); // Initialize and run the model
});
document.getElementById("send").addEventListener("click", function () {
const input = document.getElementById("user-input").value.trim();
if (input.length === 0) return;
appendMessage(input, "user");
document.getElementById("user-input").value = "";
document.getElementById("user-input").setAttribute("placeholder", "Generating...");
// Additional logic for new user questions can be added here
});
// Populate model selection dropdown
const availableModels = ["Phi-3.5-vision-instruct-q4f16_1-MLC", "Phi-3.5-vision-instruct-q4f32_1-MLC"];
availableModels.forEach((modelId) => {
const option = document.createElement("option");
option.value = modelId;
option.textContent = modelId;
document.getElementById("model-selection").appendChild(option);
});
document.getElementById("model-selection").value = selectedModel;