<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Image to Text - Hugging Face Transformers.js</title> <script type="module"> // Import the library import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.5.4'; // Make it available globally window.pipeline = pipeline; </script> <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.0/dist/css/bootstrap.min.css" rel="stylesheet"> <link rel="stylesheet" href="css/styles.css"></head> <body> <div class="container-main"> <!-- Page Header --> <div class="header"> <div class="header-logo"> <img src="images/logo.png" alt="logo"> </div> <div class="header-main-text"> <h1>Hugging Face Transformers.js</h1> </div> <div class="header-sub-text"> <h3>Free AI Models for JavaScript Web Development</h3> </div> </div> <hr> <!-- Separator --> <!-- Back to Home button --> <div class="row mt-5"> <div class="col-md-12 text-center"> <a href="index.html" class="btn btn-outline-secondary" style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> </div> </div> <!-- Content --> <div class="container mt-5"> <!-- Centered Titles --> <div class="text-center"> <h2>Computer Vision</h2> <h4>Image to Text</h4> </div> <!-- Actual Content of this page --> <div id="image-to-text-container" class="container mt-4"> <h5>Generate a Caption for an Image w/ Xenova/vit-gpt2-image-captionin:</h5> <div class="d-flex align-items-center"> <label for="imageToTextURLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter image to Caption URL:</label> <input type="text" class="form-control flex-grow-1" id="imageToTextURLText" value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg" placeholder="Enter image" style="margin-right: 15px; margin-left: 15px;"> <button id="ImagetoTextButton" class="btn btn-primary" onclick="captionImage()">Caption</button> </div> <div class="mt-4"> <h4>Output:</h4> <pre id="outputArea"></pre> </div> </div> <hr> <!-- Line Separator --> <div id="image-to-text-local-container" class="container mt-4"> <h5>Generate a Caption for a Local Image:</h5> <div class="d-flex align-items-center"> <label for="imagetoTextLocalFile" class="mb-0 text-nowrap" style="margin-right: 15px;">Select Local Image:</label> <input type="file" id="imagetoTextLocalFile" accept="image/*" /> <button id="CaptionButtonLocal" class="btn btn-primary" onclick="captionImageLocal()">Caption</button> </div> <div class="mt-4"> <h4>Output:</h4> <pre id="outputAreaLocal"></pre> </div> </div> <!-- Back to Home button --> <div class="row mt-5"> <div class="col-md-12 text-center"> <a href="index.html" class="btn btn-outline-secondary" style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a> </div> </div> </div> </div> <script> let captioner; // Initialize the sentiment analysis model async function initializeModel() { captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning'); } async function captionImage() { const textFieldValue = document.getElementById("imageToTextURLText").value.trim(); const result = await captioner(textFieldValue); document.getElementById("outputArea").innerText = JSON.stringify(result, null, 2); } async function captionImageLocal() { const fileInput = document.getElementById("imagetoTextLocalFile"); const file = fileInput.files[0]; if (!file) { alert('Please select an image file first.'); return; } // Create a Blob URL from the file const url = URL.createObjectURL(file); const result = await captioner(url); document.getElementById("outputAreaLocal").innerText = JSON.stringify(result, null, 2); } // Initialize the model after the DOM is completely loaded window.addEventListener("DOMContentLoaded", initializeModel); </script> </body> </html>