Spaces:

atlury
/

vision-test

Running

App Files Files Community

vision-test / vision_model.js

atlury's picture

Upload 4 files

367aca7 verified about 1 year ago

history blame contribute delete

4.29 kB

	import * as webllm from "https://esm.run/@mlc-ai/web-llm";

	// Global variables and configuration
	let selectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC";
	let uploadedBase64Image = "";
	let modelInitialized = false;

	// Callback function to update initialization progress
	function initProgressCallback(report) {
	document.getElementById("download-status").textContent = report.text;
	document.getElementById("download-status").classList.remove("hidden");
	}

	// Function to append messages to the chat box
	function appendMessage(message, role = "user") {
	const chatBox = document.getElementById("chat-box");
	const container = document.createElement("div");
	container.classList.add("message-container", role);

	const newMessage = document.createElement("div");
	newMessage.classList.add("message");
	newMessage.textContent = message;

	container.appendChild(newMessage);
	chatBox.appendChild(container);
	chatBox.scrollTop = chatBox.scrollHeight; // Scroll to the latest message
	}

	// Function to update the last message
	function updateLastMessage(content) {
	const messageDoms = document.querySelectorAll(".message");
	const lastMessageDom = messageDoms[messageDoms.length - 1];
	lastMessageDom.textContent = content;
	}

	// Function to check if both image and model are ready and enable the Send button
	function checkIfReadyToSend() {
	if (uploadedBase64Image && modelInitialized) {
	document.getElementById("send").disabled = false;
	}
	}

	// Main function to initialize the engine and process images
	async function main() {
	if (!uploadedBase64Image) {
	alert("Please upload an image first!");
	return;
	}

	// Initialize the engine configuration
	const engineConfig = {
	initProgressCallback: initProgressCallback,
	logLevel: "INFO",
	};

	const chatOpts = {
	context_window_size: 6144,
	};

	// Create the engine
	const engine = await webllm.CreateMLCEngine(selectedModel, engineConfig, chatOpts);

	// Indicate that the model is initialized
	modelInitialized = true;
	checkIfReadyToSend(); // Check if we can enable the Send button now

	// Construct chat messages with the uploaded image
	const messages = [
	{
	role: "user",
	content: [
	{ type: "text", text: "Describe the uploaded image." },
	{ type: "image_url", image_url: { url: uploadedBase64Image } },
	],
	},
	];

	// Send the chat request
	const request = { stream: false, messages: messages };
	const reply = await engine.chat.completions.create(request);

	// Get the reply and display it
	const replyMessage = await engine.getMessage();
	appendMessage(replyMessage, "assistant");
	document.getElementById("send").disabled = false;
	console.log(reply);
	}

	// Handle file uploads
	document.getElementById("image-input").addEventListener("change", async function(event) {
	const file = event.target.files[0];
	if (file) {
	uploadedBase64Image = await imageFileToBase64(file);
	console.log("Image uploaded and converted to base64");
	checkIfReadyToSend(); // Check if we can enable the Send button now
	}
	});

	// Set up UI bindings and event listeners
	document.getElementById("download").addEventListener("click", async function () {
	selectedModel = document.getElementById("model-selection").value;
	await main(); // Initialize and run the model
	});

	document.getElementById("send").addEventListener("click", function () {
	const input = document.getElementById("user-input").value.trim();
	if (input.length === 0) return;

	appendMessage(input, "user");
	document.getElementById("user-input").value = "";
	document.getElementById("user-input").setAttribute("placeholder", "Generating...");

	// Additional logic for new user questions can be added here
	});

	// Populate model selection dropdown
	const availableModels = ["Phi-3.5-vision-instruct-q4f16_1-MLC", "Phi-3.5-vision-instruct-q4f32_1-MLC"];
	availableModels.forEach((modelId) => {
	const option = document.createElement("option");
	option.value = modelId;
	option.textContent = modelId;
	document.getElementById("model-selection").appendChild(option);
	});
	document.getElementById("model-selection").value = selectedModel;