loracaptionertaz / App.tsx
comfyuiman's picture
Upload 15 files
b06d05b verified
import React, { useState, useCallback, useMemo, useEffect, useRef } from 'react';
import type { MediaFile } from './types';
import { GenerationStatus } from './types';
import FileUploader from './components/FileUploader';
import MediaItem from './components/MediaItem';
import { generateCaption, checkCaptionQuality } from './services/geminiService';
import { generateCaptionQwen, checkQualityQwen } from './services/qwenService';
import { DownloadIcon, SparklesIcon, WandIcon, CheckCircleIcon, LoaderIcon, TrashIcon, AlertTriangleIcon, CopyIcon } from './components/Icons';
// FIX: Explicitly declare process to prevent 'Cannot find name process' error during build
// even if @types/node is not correctly resolved by the compiler.
declare const process: {
env: {
API_KEY?: string;
[key: string]: string | undefined;
}
};
// FIX: Imported `GenerateContentResponse` to correctly type the Gemini API response.
declare global {
interface AIStudio {
hasSelectedApiKey: () => Promise<boolean>;
openSelectKey: () => Promise<void>;
}
interface Window {
JSZip: any;
aistudio?: AIStudio;
}
}
type QueueRequest =
| { type: 'generate'; id: string; customInstructions?: string }
| { type: 'quality'; id: string };
type ApiProvider = 'gemini' | 'qwen';
type OSType = 'windows' | 'linux';
const DEFAULT_BULK_INSTRUCTIONS = `Dont use ambiguous language "perhaps" for example. Also dont mention the character's name beyond the character tag at the end of the prompt. So don't say "raven with her cloak" but rather say "a woman with her cloak" for example etc. Refrain from using phrases like "character, female, male, figure of" etc just stick with consistent terminology: "woman, girl, boy, man" etc. Also do not caption any of the clothing in the image.`;
const QWEN_MODELS = [
{ id: 'thesby/Qwen3-VL-8B-NSFW-Caption-V4.5', name: 'Thesby Qwen 3 VL 8B NSFW Caption V4.5' },
{ id: 'huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated', name: 'Huihui Qwen 3 VL 8B Abliterated (Uncensored)' },
{ id: 'Qwen/Qwen3-VL-8B-Instruct-FP8', name: 'Qwen 3 VL 8B FP8 (Latest - High Efficiency)' },
{ id: 'Qwen/Qwen2.5-VL-7B-Instruct', name: 'Qwen 2.5 VL 7B (Recommended - 16GB+ VRAM)' },
{ id: 'Qwen/Qwen2.5-VL-3B-Instruct', name: 'Qwen 2.5 VL 3B (Fast / Lower VRAM)' },
{ id: 'Qwen/Qwen2-VL-7B-Instruct', name: 'Qwen 2 VL 7B (Stable)' },
{ id: 'Qwen/Qwen2-VL-2B-Instruct', name: 'Qwen 2 VL 2B (Lightweight)' },
];
const App: React.FC = () => {
const [mediaFiles, setMediaFiles] = useState<MediaFile[]>([]);
const [triggerWord, setTriggerWord] = useState<string>('MyStyle');
// Provider State
const [apiProvider, setApiProvider] = useState<ApiProvider>('gemini');
// Gemini State
const [envApiKey] = useState<string>(process.env.API_KEY || '');
const [manualApiKey, setManualApiKey] = useState<string>('');
const [isAiStudioKey, setIsAiStudioKey] = useState(false);
// Qwen State
const [qwenEndpoint, setQwenEndpoint] = useState<string>('');
const [qwenModel, setQwenModel] = useState<string>(QWEN_MODELS[0].id);
const [useCustomQwenModel, setUseCustomQwenModel] = useState<boolean>(false);
const [customQwenModelId, setCustomQwenModelId] = useState<string>('');
const [osType, setOsType] = useState<OSType>(() =>
navigator.userAgent.includes("Windows") ? 'windows' : 'linux'
);
const [localInstallPath, setLocalInstallPath] = useState<string>(() =>
navigator.userAgent.includes("Windows") ? 'C:\\AI\\QwenLocal' : '/home/user/ai/qwen_local'
);
const [qwenVideoFrameCount, setQwenVideoFrameCount] = useState<number>(8);
const [qwenMaxModelLen, setQwenMaxModelLen] = useState<number>(8192);
const [qwen8BitMode, setQwen8BitMode] = useState<boolean>(false);
const [qwenEnforceEager, setQwenEnforceEager] = useState<boolean>(false);
const [isHttps, setIsHttps] = useState<boolean>(false);
// Computed property for the active key/config validation
const hasValidConfig = useMemo(() => {
if (apiProvider === 'gemini') {
if (envApiKey) return true;
if (manualApiKey) return true;
if (isAiStudioKey) return true;
return false;
} else {
// Qwen Validation: Endpoint required. If custom model selected, ID is required.
if (!qwenEndpoint) return false;
if (useCustomQwenModel && !customQwenModelId.trim()) return false;
return true;
}
}, [apiProvider, envApiKey, manualApiKey, isAiStudioKey, qwenEndpoint, useCustomQwenModel, customQwenModelId]);
const activeGeminiKey = useMemo(() => {
if (envApiKey) return envApiKey;
if (manualApiKey) return manualApiKey;
return '';
}, [envApiKey, manualApiKey]);
const activeQwenModel = useMemo(() => {
return useCustomQwenModel ? customQwenModelId.trim() : qwenModel;
}, [useCustomQwenModel, customQwenModelId, qwenModel]);
const [bulkInstructions, setBulkInstructions] = useState<string>('');
const [bulkGenerationInstructions, setBulkGenerationInstructions] = useState<string>(DEFAULT_BULK_INSTRUCTIONS);
const [autofitTextareas, setAutofitTextareas] = useState<boolean>(false);
const [datasetPrefix, setDatasetPrefix] = useState<string>('item');
const [isCharacterTaggingEnabled, setIsCharacterTaggingEnabled] = useState<boolean>(false);
const [characterShowName, setCharacterShowName] = useState<string>('');
// Queueing system state
const [isQueueEnabled, setIsQueueEnabled] = useState<boolean>(false);
const [requestQueue, setRequestQueue] = useState<QueueRequest[]>([]);
const [isProcessingQueueItem, setIsProcessingQueueItem] = useState<boolean>(false);
const [completedQueueCount, setCompletedQueueCount] = useState<number>(0);
const [rpmLimit, setRpmLimit] = useState<number>(150);
const [batchSize, setBatchSize] = useState<number>(1);
// Check protocol on mount
useEffect(() => {
const isSecure = window.location.protocol === 'https:';
setIsHttps(isSecure);
// Auto-set default Qwen endpoint based on protocol
if (!qwenEndpoint) {
if (isSecure) {
setQwenEndpoint(''); // User must provide tunnel URL
} else {
setQwenEndpoint('http://localhost:8000/v1');
}
}
}, []);
useEffect(() => {
const checkKey = async () => {
if (envApiKey) return;
if (window.aistudio && typeof window.aistudio.hasSelectedApiKey === 'function') {
const keySelected = await window.aistudio.hasSelectedApiKey();
setIsAiStudioKey(keySelected);
}
};
checkKey();
}, [envApiKey]);
const handleSelectKey = async () => {
if (envApiKey) {
alert("API Key is configured via environment variables.");
return;
}
if (window.aistudio && typeof window.aistudio.openSelectKey === 'function') {
try {
await window.aistudio.openSelectKey();
setIsAiStudioKey(true);
} catch (e) {
console.error("Error opening API key selection:", e);
}
} else {
// Focus the manual input if available
const input = document.getElementById('manual-api-key');
if (input) input.focus();
}
};
const startCommand = useMemo(() => {
const isWindows = osType === 'windows';
const modelToUse = activeQwenModel || "Qwen/Qwen2.5-VL-7B-Instruct"; // Fallback for display
// Construct paths assuming standard setup from install script
const path = localInstallPath.endsWith('\\') || localInstallPath.endsWith('/') ? localInstallPath.slice(0, -1) : localInstallPath;
// Construct flags
let flags = `--model ${modelToUse} --gpu-memory-utilization 0.95 --max-model-len ${qwenMaxModelLen} --port 8000 --trust-remote-code`;
if (qwen8BitMode) {
flags += " --quantization bitsandbytes --load-format bitsandbytes";
}
if (qwenEnforceEager) {
flags += " --enforce-eager";
}
if (isWindows) {
return `cd /d "${path}" && call venv\\Scripts\\activate && python -m vllm.entrypoints.openai.api_server ${flags}`;
} else {
return `cd "${path}" && source venv/bin/activate && python3 -m vllm.entrypoints.openai.api_server ${flags}`;
}
}, [localInstallPath, activeQwenModel, qwenMaxModelLen, osType, qwen8BitMode, qwenEnforceEager]);
const copyStartCommand = () => {
navigator.clipboard.writeText(startCommand);
};
const generateInstallScript = () => {
const isWindows = osType === 'windows';
const modelToUse = activeQwenModel || "Qwen/Qwen2.5-VL-7B-Instruct";
if (!modelToUse) {
alert("Please select a model or enter a valid Custom Model ID.");
return;
}
// Construct flags for the echo message in script
let launchFlags = `--model ${modelToUse} --gpu-memory-utilization 0.95 --max-model-len ${qwenMaxModelLen} --port 8000 --trust-remote-code`;
if (qwen8BitMode) {
launchFlags += " --quantization bitsandbytes --load-format bitsandbytes";
}
if (qwenEnforceEager) {
launchFlags += " --enforce-eager";
}
// Install dependencies command
const pipInstallCmd = qwen8BitMode
? "pip install vllm bitsandbytes"
: "pip install vllm";
// Commands for vllm setup
const installCmds = isWindows ?
`
@echo off
echo ===================================================
echo LoRA Caption Assistant - Local Qwen Setup
echo ===================================================
echo.
echo This script will set up a Python virtual environment and install vLLM.
echo Ensure you have Python 3.10+ and CUDA toolkit installed.
echo.
echo SELECTED MODEL: ${modelToUse}
echo MAX CONTEXT: ${qwenMaxModelLen}
echo 8-BIT MODE: ${qwen8BitMode ? 'ON' : 'OFF'}
echo EAGER MODE: ${qwenEnforceEager ? 'ON' : 'OFF'}
echo.
pause
mkdir "${localInstallPath}"
cd /d "${localInstallPath}"
echo Creating virtual environment...
python -m venv venv
call venv\\Scripts\\activate
echo Installing dependencies...
${pipInstallCmd}
echo.
echo ===================================================
echo Setup Complete!
echo ===================================================
echo.
echo To start the server, run this command:
echo python -m vllm.entrypoints.openai.api_server ${launchFlags}
echo.
echo ---------------------------------------------------
echo IMPORTANT FOR HUGGING FACE SPACES USERS (HTTPS):
echo ---------------------------------------------------
echo Because the Web App is running on HTTPS, it cannot connect to http://localhost directly.
echo You must create a secure tunnel.
echo.
echo OPTION 1: Cloudflare Tunnel (Recommended - No account needed)
echo 1. Download 'cloudflared'
echo 2. Run: cloudflared tunnel --url http://localhost:8000
echo 3. Copy the 'https://....trycloudflare.com' URL into the App Endpoint field.
echo.
echo OPTION 2: Ngrok
echo 1. Run: ngrok http 8000
echo 2. Copy the 'https://....ngrok-free.app' URL into the App Endpoint field.
echo.
pause
` :
`#!/bin/bash
echo "==================================================="
echo " LoRA Caption Assistant - NO MAIN DRIVE USAGE"
echo "==================================================="
# --- CONFIGURATION ---
BASE_DIR="${localInstallPath}"
mkdir -p "$BASE_DIR"
# 1. Redirect Temporary Files
export TMPDIR="$BASE_DIR/tmp"
mkdir -p "$TMPDIR"
# 2. Redirect Pip Download Cache
export PIP_CACHE_DIR="$BASE_DIR/pip_cache"
mkdir -p "$PIP_CACHE_DIR"
# 3. Redirect Hugging Face Models
export HF_HOME="$BASE_DIR/hf_cache"
mkdir -p "$HF_HOME"
echo "Files, Cache, and Temp set to: $BASE_DIR"
cd "$BASE_DIR"
# --- VIRTUAL ENV SETUP ---
if [ ! -d "venv" ]; then
echo "Creating virtual environment..."
python3 -m venv venv
# Write variables into the activation script PERMANENTLY
echo "export TMPDIR=\\"$TMPDIR\\"" >> venv/bin/activate
echo "export PIP_CACHE_DIR=\\"$PIP_CACHE_DIR\\"" >> venv/bin/activate
echo "export HF_HOME=\\"$HF_HOME\\"" >> venv/bin/activate
fi
# Activate now
source venv/bin/activate
# --- INSTALL ---
echo "Installing dependencies..."
${pipInstallCmd}
echo ""
echo "==================================================="
echo " Setup Complete!"
echo "==================================================="
echo "You are now inside the environment."
echo "To start the server, just copy-paste this line:"
echo ""
echo "python3 -m vllm.entrypoints.openai.api_server ${launchFlags}"
echo ""
echo "==================================================="
# --- THE MAGIC TRICK ---
# This command replaces the script process with a new interactive shell.
# It prevents the terminal from "closing" or resetting.
# You will stay inside the venv with all your active paths.
exec bash
`;
const blob = new Blob([installCmds], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = isWindows ? 'install_qwen_local.bat' : 'install_qwen_local.sh';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
};
const updateFile = useCallback((id: string, updates: Partial<MediaFile>) => {
setMediaFiles(prev =>
prev.map(mf => (mf.id === id ? { ...mf, ...updates } : mf))
);
}, []);
const handleFilesAdded = useCallback(async (files: File[]) => {
const mediaUploads = files.filter(file => file.type.startsWith('image/') || file.type.startsWith('video/'));
const captionUploads = files.filter(file => file.name.toLowerCase().endsWith('.txt'));
const captionMap = new Map<string, File>();
captionUploads.forEach(file => {
const baseName = file.name.substring(0, file.name.lastIndexOf('.'));
captionMap.set(baseName.toLowerCase(), file);
});
const newMediaFilesPromises = mediaUploads.map(async (file): Promise<MediaFile> => {
const id = `${file.name}-${file.lastModified}-${Math.random()}`;
let caption = '';
let status = GenerationStatus.IDLE;
let errorMessage: string | undefined = undefined;
const baseName = file.name.substring(0, file.name.lastIndexOf('.'));
if (captionMap.has(baseName.toLowerCase())) {
const captionFile = captionMap.get(baseName.toLowerCase())!;
try {
caption = await captionFile.text();
status = GenerationStatus.SUCCESS;
} catch (e) {
console.error("Error reading caption file:", e);
status = GenerationStatus.ERROR;
errorMessage = "Failed to read caption file.";
}
}
return {
id,
file,
previewUrl: URL.createObjectURL(file),
caption,
status,
isSelected: false,
customInstructions: '',
errorMessage
};
});
const newMediaFiles = await Promise.all(newMediaFilesPromises);
setMediaFiles(prev => [...prev, ...newMediaFiles]);
}, []);
const _generateCaption = useCallback(async (id: string, customInstructions?: string) => {
const fileToProcess = mediaFiles.find(mf => mf.id === id);
if (!hasValidConfig) {
updateFile(id, { status: GenerationStatus.ERROR, errorMessage: 'Provider configuration incomplete. Check settings.' });
return;
}
if (!fileToProcess || !triggerWord) {
if (!triggerWord) {
updateFile(id, { status: GenerationStatus.ERROR, errorMessage: 'Trigger word cannot be empty.' });
}
return;
}
updateFile(id, { status: GenerationStatus.GENERATING, errorMessage: undefined, qualityScore: undefined });
try {
let caption = '';
if (apiProvider === 'gemini') {
if (!activeGeminiKey) throw new Error("Gemini API Key missing");
caption = await generateCaption(
activeGeminiKey,
fileToProcess.file,
triggerWord,
customInstructions,
isCharacterTaggingEnabled,
characterShowName
);
} else if (apiProvider === 'qwen') {
const modelToUse = activeQwenModel;
if (!modelToUse) throw new Error("No Qwen Model selected");
caption = await generateCaptionQwen(
'',
qwenEndpoint,
modelToUse,
fileToProcess.file,
triggerWord,
customInstructions,
isCharacterTaggingEnabled,
characterShowName,
qwenVideoFrameCount
);
}
updateFile(id, { caption, status: GenerationStatus.SUCCESS });
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'An unknown error occurred.';
updateFile(id, { status: GenerationStatus.ERROR, errorMessage });
}
}, [mediaFiles, triggerWord, updateFile, isCharacterTaggingEnabled, characterShowName, apiProvider, activeGeminiKey, qwenEndpoint, activeQwenModel, hasValidConfig, qwenVideoFrameCount]);
const _checkQuality = useCallback(async (id: string) => {
const fileToCheck = mediaFiles.find(mf => mf.id === id);
if (!hasValidConfig) {
updateFile(id, { status: GenerationStatus.ERROR, errorMessage: 'Provider configuration incomplete.' });
return;
}
if (!fileToCheck || !fileToCheck.caption.trim()) return;
updateFile(id, { status: GenerationStatus.CHECKING, errorMessage: undefined });
try {
let score = 0;
if (apiProvider === 'gemini') {
if (!activeGeminiKey) throw new Error("Gemini API Key missing");
score = await checkCaptionQuality(activeGeminiKey, fileToCheck.file, fileToCheck.caption);
} else if (apiProvider === 'qwen') {
const modelToUse = activeQwenModel;
if (!modelToUse) throw new Error("No Qwen Model selected");
score = await checkQualityQwen(
'',
qwenEndpoint,
modelToUse,
fileToCheck.file,
fileToCheck.caption,
qwenVideoFrameCount
);
}
updateFile(id, { status: GenerationStatus.SUCCESS, qualityScore: score });
} catch (err) {
const errorMessage = err instanceof Error ? err.message : 'An unknown error occurred during quality check.';
updateFile(id, {
status: GenerationStatus.ERROR,
errorMessage,
qualityScore: undefined,
});
}
}, [mediaFiles, updateFile, apiProvider, activeGeminiKey, qwenEndpoint, activeQwenModel, hasValidConfig, qwenVideoFrameCount]);
// Use a ref to hold the latest handlers to avoid stale closures in the queue processor.
const handlersRef = useRef({ _generateCaption, _checkQuality });
useEffect(() => {
handlersRef.current = { _generateCaption, _checkQuality };
}, [_generateCaption, _checkQuality]);
// Effect to process the request queue in concurrent batches.
useEffect(() => {
if (isQueueEnabled && !isProcessingQueueItem && requestQueue.length > 0) {
setIsProcessingQueueItem(true);
const batch = requestQueue.slice(0, batchSize);
const processBatch = async () => {
const promises = batch.map(request => {
if (request.type === 'generate') {
return handlersRef.current._generateCaption(request.id, request.customInstructions);
} else if (request.type === 'quality') {
return handlersRef.current._checkQuality(request.id);
}
return Promise.resolve(); // Should not happen in practice
});
await Promise.allSettled(promises);
};
const delay = rpmLimit > 0 ? Math.ceil((batch.length * 60 * 1000) / rpmLimit) : 1000;
processBatch().finally(() => {
// Wait for the calculated delay before processing the next batch.
setTimeout(() => {
setCompletedQueueCount(prev => prev + batch.length);
setRequestQueue(prev => prev.slice(batch.length));
setIsProcessingQueueItem(false);
}, delay);
});
} else if (!isProcessingQueueItem && requestQueue.length === 0 && completedQueueCount > 0) {
// After the queue is empty, reset the completed count for the next run.
setCompletedQueueCount(0);
}
}, [isQueueEnabled, isProcessingQueueItem, requestQueue, rpmLimit, completedQueueCount, batchSize]);
const handleGenerateCaption = useCallback((id: string, itemCustomInstructions?: string) => {
// Merge Bulk Instructions with Item Specific Instructions
const bulk = bulkGenerationInstructions.trim();
const item = itemCustomInstructions?.trim() || '';
let combinedInstructions = item;
if (bulk) {
combinedInstructions = item ? `${bulk}\n\n${item}` : bulk;
}
if (isQueueEnabled) {
setRequestQueue(prev => [...prev, { type: 'generate', id, customInstructions: combinedInstructions }]);
} else {
_generateCaption(id, combinedInstructions);
}
}, [isQueueEnabled, _generateCaption, bulkGenerationInstructions]);
const selectedFiles = useMemo(() => mediaFiles.filter(mf => mf.isSelected), [mediaFiles]);
const selectAllCheckboxRef = useRef<HTMLInputElement>(null);
const allSelected = useMemo(() => mediaFiles.length > 0 && mediaFiles.every(mf => mf.isSelected), [mediaFiles]);
const someSelected = useMemo(() => mediaFiles.some(mf => mf.isSelected) && !allSelected, [mediaFiles]);
useEffect(() => {
if (selectAllCheckboxRef.current) {
selectAllCheckboxRef.current.indeterminate = someSelected;
}
}, [someSelected]);
const handleSelectAll = (e: React.ChangeEvent<HTMLInputElement>) => {
const isChecked = e.target.checked;
setMediaFiles(prev => prev.map(mf => ({ ...mf, isSelected: isChecked })));
};
const handleCheckQuality = useCallback(async () => {
const filesToCheck = selectedFiles.filter(mf => mf.caption.trim() !== '');
if (filesToCheck.length === 0) {
alert("Please select files with captions to check quality.");
return;
}
if (isQueueEnabled) {
const newRequests = filesToCheck.map(file => ({ type: 'quality' as const, id: file.id }));
setRequestQueue(prev => [...prev, ...newRequests]);
} else {
const promises = filesToCheck.map(file => _checkQuality(file.id));
await Promise.all(promises);
}
}, [selectedFiles, isQueueEnabled, _checkQuality]);
const handleGenerateAll = useCallback(() => {
const filesToGenerate = mediaFiles.filter(mf => mf.status === GenerationStatus.IDLE || mf.status === GenerationStatus.ERROR);
if (filesToGenerate.length === 0) return;
// Merge Bulk Instructions with Item Specific Instructions
const bulk = bulkGenerationInstructions.trim();
filesToGenerate.forEach(file => {
const item = file.customInstructions?.trim() || '';
let combined = item;
if (bulk) {
combined = item ? `${bulk}\n\n${item}` : bulk;
}
if (isQueueEnabled) {
setRequestQueue(prev => [...prev, { type: 'generate', id: file.id, customInstructions: combined }]);
} else {
_generateCaption(file.id, combined);
}
});
}, [mediaFiles, bulkGenerationInstructions, isQueueEnabled, _generateCaption]);
const handleGenerateSelected = useCallback(() => {
const filesToGenerate = selectedFiles.filter(mf => mf.status === GenerationStatus.IDLE || mf.status === GenerationStatus.ERROR);
if (filesToGenerate.length === 0) {
alert("Please select items that haven't been generated yet.");
return;
}
// Merge Bulk Instructions with Item Specific Instructions
const bulk = bulkGenerationInstructions.trim();
filesToGenerate.forEach(file => {
const item = file.customInstructions?.trim() || '';
let combined = item;
if (bulk) {
combined = item ? `${bulk}\n\n${item}` : bulk;
}
if (isQueueEnabled) {
setRequestQueue(prev => [...prev, { type: 'generate', id: file.id, customInstructions: combined }]);
} else {
_generateCaption(file.id, combined);
}
});
}, [selectedFiles, bulkGenerationInstructions, isQueueEnabled, _generateCaption]);
const handleRefineSelected = useCallback(() => {
if (!bulkInstructions.trim()) {
alert('Please enter instructions for bulk refinement.');
return;
}
selectedFiles.forEach(file => {
handleGenerateCaption(file.id, bulkInstructions);
});
}, [selectedFiles, bulkInstructions, handleGenerateCaption]);
const handleClearQueue = () => {
setRequestQueue([]);
setCompletedQueueCount(0);
};
const handleCaptionChange = useCallback((id: string, caption: string) => {
updateFile(id, { caption, qualityScore: undefined });
}, [updateFile]);
const handleCustomInstructionsChange = useCallback((id: string, instructions: string) => {
updateFile(id, { customInstructions: instructions });
}, [updateFile]);
const handleSelectionChange = useCallback((id: string, isSelected: boolean) => {
updateFile(id, { isSelected });
}, [updateFile]);
const handleDownload = useCallback(async () => {
if (selectedFiles.length === 0 || !window.JSZip) return;
const zip = new window.JSZip();
let imageCounter = 0;
let videoCounter = 0;
await Promise.all(selectedFiles.map(async ({ file, caption }) => {
const extension = file.name.split('.').pop() || '';
let newBaseName: string;
if (file.type.startsWith('image/')) {
imageCounter++;
newBaseName = `${datasetPrefix}_image${String(imageCounter).padStart(2, '0')}`;
} else if (file.type.startsWith('video/')) {
videoCounter++;
newBaseName = `${datasetPrefix}_video${String(videoCounter).padStart(2, '0')}`;
} else {
newBaseName = `${datasetPrefix}_file${String(imageCounter + videoCounter).padStart(2, '0')}`;
}
zip.file(`${newBaseName}.${extension}`, file);
zip.file(`${newBaseName}.txt`, caption);
}));
const zipFileName = datasetPrefix ? `${datasetPrefix}_dataset_${Date.now()}.zip` : `lora-dataset-${Date.now()}.zip`;
zip.generateAsync({ type: 'blob' }).then((content: Blob) => {
const link = document.createElement('a');
link.href = URL.createObjectURL(content);
link.download = zipFileName;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
});
}, [selectedFiles, datasetPrefix]);
const handleDeleteSelected = useCallback(() => {
if (selectedFiles.length === 0) {
return;
}
if (window.confirm(`Are you sure you want to delete ${selectedFiles.length} selected item(s)? This action cannot be undone.`)) {
const newMediaFiles = mediaFiles.filter(mf => !mf.isSelected);
setMediaFiles(newMediaFiles);
}
}, [mediaFiles, selectedFiles]);
const totalQueueItems = completedQueueCount + requestQueue.length;
const queueProgress = totalQueueItems > 0 ? (completedQueueCount / totalQueueItems) * 100 : 0;
return (
<div className="min-h-screen container mx-auto p-4 sm:p-6 lg:p-8">
<header className="text-center mb-8">
<h1 className="text-4xl font-bold text-white tracking-tight">LoRA Caption Assistant</h1>
<p className="mt-2 text-lg text-gray-400">Generate high-quality captions for your training data.</p>
</header>
<div className="bg-yellow-500/10 border border-yellow-500/50 rounded-lg p-4 mb-8 text-yellow-200 text-sm space-y-2 max-w-3xl mx-auto">
<div className="flex items-start gap-3">
<AlertTriangleIcon className="w-6 h-6 flex-shrink-0 mt-0.5 text-yellow-500" />
<ul className="list-disc pl-4 space-y-1">
<li><strong>SFW ONLY:</strong> You may get your Google/API account banned if you upload NSFW content.</li>
<li>This tool requires an API KEY. Depending on your plan and usage, this can cost you money for each request.</li>
<li>Running in high batches (Request Queue) can increase costs for API usage rapidly. The recommended default is <strong>"1"</strong>.</li>
</ul>
</div>
</div>
<main className="space-y-8">
<section className="bg-gray-800/50 p-6 rounded-lg shadow-lg space-y-6">
<h2 className="text-xl font-semibold">1. Global Settings & Actions</h2>
<div className="grid grid-cols-1 md:grid-cols-2 gap-x-8 gap-y-4">
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-300 mb-1">AI Provider</label>
<div className="grid grid-cols-2 gap-2 p-1 bg-gray-900 rounded-md">
<button
onClick={() => setApiProvider('gemini')}
className={`py-2 rounded text-sm font-medium transition-colors ${apiProvider === 'gemini' ? 'bg-indigo-600 text-white' : 'text-gray-400 hover:bg-gray-800'}`}
>
Google Gemini
</button>
<button
onClick={() => setApiProvider('qwen')}
className={`py-2 rounded text-sm font-medium transition-colors ${apiProvider === 'qwen' ? 'bg-indigo-600 text-white' : 'text-gray-400 hover:bg-gray-800'}`}
>
Local Qwen (GPU)
</button>
</div>
</div>
{/* Gemini Settings */}
{apiProvider === 'gemini' && (
<div className="space-y-1 animate-fade-in">
<label className="block text-sm font-medium text-gray-300">Google API Key</label>
{envApiKey ? (
<button
disabled
className="w-full p-2 rounded-md bg-green-900/50 text-green-200 border border-green-700 cursor-not-allowed"
>
API Key configured via Environment Variables ✓
</button>
) : (
<div className="flex gap-2">
<input
id="manual-api-key"
type="password"
value={manualApiKey}
onChange={(e) => setManualApiKey(e.target.value)}
placeholder="Paste Gemini API Key here"
className="flex-grow p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500"
/>
{window.aistudio && (
<button
onClick={handleSelectKey}
className="px-3 py-2 bg-gray-700 hover:bg-gray-600 rounded-md border border-gray-600 text-sm"
>
Select
</button>
)}
</div>
)}
<div className="flex flex-col gap-1 pt-1">
<a href="https://aistudio.google.com/api-keys" target="_blank" rel="noopener noreferrer" className="text-xs text-indigo-400 hover:text-indigo-300 underline w-fit">
Get your API Key here
</a>
</div>
</div>
)}
{/* Qwen Settings (Local Only) */}
{apiProvider === 'qwen' && (
<div className="space-y-3 animate-fade-in border-l-2 border-indigo-500 pl-3">
<div>
<label className="block text-sm font-medium text-gray-300 mb-1">Local Model</label>
<div className="flex items-center gap-2 mb-2">
<input
type="checkbox"
id="use-custom-model"
checked={useCustomQwenModel}
onChange={(e) => setUseCustomQwenModel(e.target.checked)}
className="h-4 w-4 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
/>
<label htmlFor="use-custom-model" className="text-xs text-gray-400 cursor-pointer">
Use Custom Model ID (from Hugging Face)
</label>
</div>
{useCustomQwenModel ? (
<input
type="text"
value={customQwenModelId}
onChange={(e) => setCustomQwenModelId(e.target.value)}
placeholder="e.g. Qwen/Qwen2.5-VL-72B-Instruct"
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 text-sm"
/>
) : (
<select
value={qwenModel}
onChange={(e) => setQwenModel(e.target.value)}
disabled={useCustomQwenModel}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 text-sm disabled:opacity-50"
>
{QWEN_MODELS.map(m => (
<option key={m.id} value={m.id}>{m.name}</option>
))}
</select>
)}
</div>
<div className="space-y-3 bg-gray-900/50 p-3 rounded-md">
<div className="flex justify-between items-center">
<div className="text-xs text-gray-400">
<strong>Installation:</strong> Select model & path.
</div>
<div className="flex gap-3 bg-gray-800 rounded p-1">
<label className="flex items-center space-x-1.5 cursor-pointer">
<input
type="radio"
name="osType"
checked={osType === 'windows'}
onChange={() => setOsType('windows')}
className="text-indigo-600 focus:ring-indigo-500 h-3 w-3 bg-gray-700 border-gray-500"
/>
<span className={`text-xs ${osType === 'windows' ? 'text-indigo-300' : 'text-gray-400'}`}>Windows</span>
</label>
<label className="flex items-center space-x-1.5 cursor-pointer">
<input
type="radio"
name="osType"
checked={osType === 'linux'}
onChange={() => setOsType('linux')}
className="text-indigo-600 focus:ring-indigo-500 h-3 w-3 bg-gray-700 border-gray-500"
/>
<span className={`text-xs ${osType === 'linux' ? 'text-indigo-300' : 'text-gray-400'}`}>Linux</span>
</label>
</div>
</div>
<div className="flex gap-3">
<div className="flex-grow">
<label className="block text-xs font-medium text-gray-400 mb-1">Install Directory</label>
<input
type="text"
value={localInstallPath}
onChange={(e) => setLocalInstallPath(e.target.value)}
className="w-full p-1.5 bg-gray-700 border border-gray-600 rounded-md text-sm"
/>
</div>
<div className="w-24 flex-shrink-0">
<label className="block text-xs font-medium text-gray-400 mb-1" title="Max Context Window">Max Tokens</label>
<input
type="number"
value={qwenMaxModelLen}
onChange={(e) => setQwenMaxModelLen(Number(e.target.value))}
className="w-full p-1.5 bg-gray-700 border border-gray-600 rounded-md text-sm text-center"
step="1024"
min="2048"
/>
</div>
</div>
<div className="flex flex-col gap-2 mt-1">
<label className="flex items-center space-x-2 cursor-pointer">
<input
type="checkbox"
checked={qwen8BitMode}
onChange={(e) => setQwen8BitMode(e.target.checked)}
className="h-3.5 w-3.5 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
/>
<span className="text-xs text-gray-300">Enable 8-bit Quantization (bitsandbytes)</span>
</label>
<label className="flex items-center space-x-2 cursor-pointer">
<input
type="checkbox"
checked={qwenEnforceEager}
onChange={(e) => setQwenEnforceEager(e.target.checked)}
className="h-3.5 w-3.5 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
/>
<span className="text-xs text-gray-300">Enforce Eager Mode</span>
</label>
</div>
<button
onClick={generateInstallScript}
className="w-full px-3 py-1.5 bg-green-700 hover:bg-green-600 rounded-md text-xs font-medium transition-colors"
>
Download Setup Script ({osType === 'windows' ? 'Batch' : 'Bash'})
</button>
<p className="text-[10px] text-gray-500">
Max Tokens sets --max-model-len. Default: 8192. Lower to 4096 if OOM.
</p>
<div className="mt-2 pt-2 border-t border-gray-700">
<label className="block text-[10px] font-medium text-gray-400 mb-1">
Already installed? Run this to start server:
</label>
<div className="flex gap-2">
<input
readOnly
value={startCommand}
className="w-full p-1.5 bg-black/30 border border-gray-600 rounded-md text-xs font-mono text-green-400 truncate"
onClick={(e) => e.currentTarget.select()}
/>
<button
onClick={copyStartCommand}
className="px-2 bg-gray-700 hover:bg-gray-600 border border-gray-600 rounded text-gray-300 transition-colors"
title="Copy command"
>
<CopyIcon className="w-4 h-4" />
</button>
</div>
</div>
{isHttps && (
<div className="bg-red-900/30 border border-red-500/50 p-2 rounded text-xs text-red-200 mt-2">
<strong>Tunneling Required:</strong> This App is on HTTPS. It cannot talk to localhost (HTTP).
<br className="mb-1"/>
Use <strong>Cloudflare Tunnel</strong> or <strong>Ngrok</strong> to get a public HTTPS URL for your local server.
</div>
)}
<div className="mt-3">
<label className="block text-sm font-medium text-gray-300 mb-1">
Local Endpoint {isHttps ? '(Tunnel URL)' : ''}
</label>
<div className="flex gap-2">
<input
type="text"
value={qwenEndpoint}
onChange={(e) => setQwenEndpoint(e.target.value)}
placeholder={isHttps ? "https://....trycloudflare.com/v1" : "http://localhost:8000/v1"}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 text-sm"
/>
{!isHttps && (
<button
onClick={() => setQwenEndpoint('http://localhost:8000/v1')}
className="px-2 bg-gray-700 hover:bg-gray-600 border border-gray-600 rounded text-xs"
title="Reset to localhost"
>
Reset
</button>
)}
</div>
</div>
<div className="pt-1 border-t border-gray-700 mt-2">
<label className="block text-sm font-medium text-gray-300 mb-1">
Video Frame Sampling
</label>
<div className="flex items-center gap-3">
<input
type="range"
min="1"
max="32"
step="1"
value={qwenVideoFrameCount}
onChange={(e) => setQwenVideoFrameCount(parseInt(e.target.value))}
className="flex-grow h-2 bg-gray-700 rounded-lg appearance-none cursor-pointer accent-indigo-500"
/>
<span className="text-sm text-gray-400 w-12 text-right">{qwenVideoFrameCount} f</span>
</div>
<p className="text-xs text-gray-500 mt-1">Number of frames to extract and send to Qwen for video files.</p>
</div>
</div>
</div>
)}
<div>
<label htmlFor="trigger-word" className="block text-sm font-medium text-gray-300 mb-1">Trigger Word</label>
<input
id="trigger-word"
type="text"
value={triggerWord}
onChange={e => setTriggerWord(e.target.value)}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500"
placeholder="e.g., GurrenLagannStyle"
/>
</div>
<div>
<label htmlFor="dataset-prefix" className="block text-sm font-medium text-gray-300 mb-1">Dataset File Prefix</label>
<input
id="dataset-prefix"
type="text"
value={datasetPrefix}
onChange={e => setDatasetPrefix(e.target.value)}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500"
placeholder="e.g., my_dataset"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-300 mb-1">Character Tagging</label>
<div className="flex items-center space-x-3">
<input
type="checkbox"
id="character-tagging-toggle"
checked={isCharacterTaggingEnabled}
onChange={(e) => setIsCharacterTaggingEnabled(e.target.checked)}
className="h-4 w-4 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600 flex-shrink-0"
title="Enable character tagging"
/>
<input
id="character-show-name"
type="text"
value={characterShowName}
onChange={e => setCharacterShowName(e.target.value)}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 disabled:bg-gray-800 disabled:text-gray-500 disabled:cursor-not-allowed transition-colors"
placeholder="Enter show/series name..."
disabled={!isCharacterTaggingEnabled}
/>
</div>
</div>
</div>
<div className="space-y-4">
<div>
<label htmlFor="bulk-generation-instructions" className="block text-sm font-medium text-gray-300 mb-1">Bulk Generation Instructions</label>
<textarea
id="bulk-generation-instructions"
value={bulkGenerationInstructions}
onChange={(e) => setBulkGenerationInstructions(e.target.value)}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 min-h-[120px] text-sm"
placeholder="For 'Generate All'/'Selected' button..."
rows={4}
/>
</div>
<div>
<label htmlFor="bulk-instructions" className="block text-sm font-medium text-gray-300 mb-1">Bulk Refinement Instructions</label>
<div className="flex gap-2">
<input
id="bulk-instructions"
type="text"
value={bulkInstructions}
onChange={(e) => setBulkInstructions(e.target.value)}
className="w-full p-2 bg-gray-700 border border-gray-600 rounded-md focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500"
placeholder="For 'Refine' button, e.g., Focus on..."
/>
<button
onClick={handleRefineSelected}
disabled={!hasValidConfig || selectedFiles.length === 0 || !bulkInstructions.trim()}
className="flex-shrink-0 flex items-center justify-center px-4 py-2 bg-purple-600 text-white rounded-md hover:bg-purple-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
title={!hasValidConfig ? "Please check provider settings" : "Apply instructions to all selected items"}
>
<WandIcon className="w-5 h-5 mr-2" />
<span>Refine ({selectedFiles.length})</span>
</button>
</div>
</div>
<div className="flex items-center space-x-3 pt-2">
<input
type="checkbox"
id="autofit-toggle"
checked={autofitTextareas}
onChange={(e) => setAutofitTextareas(e.target.checked)}
className="h-4 w-4 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
/>
<label htmlFor="autofit-toggle" className="text-sm font-medium text-gray-300">
Autofit caption textboxes
</label>
</div>
<div className="pt-2">
<div className="flex items-center space-x-3 cursor-pointer">
<input
type="checkbox"
id="queue-toggle"
checked={isQueueEnabled}
onChange={(e) => setIsQueueEnabled(e.target.checked)}
className="h-4 w-4 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
/>
<label htmlFor="queue-toggle" className="text-sm font-medium text-gray-300">
Enable Request Queue
</label>
</div>
{isQueueEnabled && (
<div className="pl-7 mt-2 space-y-2">
<div className="flex items-center space-x-2">
<label htmlFor="rpm-limit" className="text-sm font-medium text-gray-300">RPM Limit:</label>
<input
type="number"
id="rpm-limit"
value={rpmLimit}
onChange={(e) => setRpmLimit(parseInt(e.target.value, 10) || 1)}
className="w-20 p-1 bg-gray-900 border border-gray-600 rounded-md text-sm focus:ring-1 focus:ring-indigo-500"
min="1"
/>
<label htmlFor="batch-size" className="text-sm font-medium text-gray-300">Batch Size:</label>
<input
type="number"
id="batch-size"
value={batchSize}
onChange={(e) => setBatchSize(Math.max(1, parseInt(e.target.value, 10) || 1))}
className="w-20 p-1 bg-gray-900 border border-gray-600 rounded-md text-sm focus:ring-1 focus:ring-indigo-500"
min="1"
/>
</div>
{(requestQueue.length > 0 || isProcessingQueueItem) ? (
<div className="flex items-center gap-3 w-full">
<div className="flex-grow">
<div className="flex justify-between mb-1">
<span className="font-medium text-pink-300 flex items-center">
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
Processing Queue...
</span>
<span className="text-gray-400 font-mono">
{completedQueueCount} / {totalQueueItems}
</span>
</div>
<div className="w-full bg-gray-900 rounded-full h-2.5 relative overflow-hidden">
<div
className="bg-gradient-to-r from-pink-400 to-purple-500 h-2.5 rounded-full transition-all duration-300 ease-out"
style={{ width: `${queueProgress}%` }}
></div>
</div>
</div>
<button
onClick={handleClearQueue}
disabled={requestQueue.length === 0}
className="text-gray-400 hover:text-white disabled:text-gray-600 disabled:cursor-not-allowed p-1 rounded-full hover:bg-gray-700 transition-colors flex-shrink-0"
title="Clear queue"
>
<TrashIcon className="w-4 h-4" />
</button>
</div>
) : (
<span className="text-xs text-gray-400">Queue is idle.</span>
)}
</div>
)}
</div>
</div>
</div>
<div className="border-t border-gray-700 pt-4 flex flex-col sm:flex-row flex-wrap items-center justify-end gap-3">
<button
onClick={handleGenerateAll}
disabled={!hasValidConfig || mediaFiles.length === 0}
className="w-full sm:w-auto flex items-center justify-center px-4 py-2 bg-indigo-600 text-white rounded-md hover:bg-indigo-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
title={!hasValidConfig ? "Please check provider settings" : "Generate captions for all unprocessed items"}
>
<SparklesIcon className="w-5 h-5 mr-2" />
Generate All
</button>
<button
onClick={handleGenerateSelected}
disabled={!hasValidConfig || selectedFiles.length === 0}
className="w-full sm:w-auto flex items-center justify-center px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
title={!hasValidConfig ? "Please check provider settings" : "Generate captions for selected items"}
>
<SparklesIcon className="w-5 h-5 mr-2" />
<span>Generate Selected ({selectedFiles.length})</span>
</button>
<button
onClick={handleCheckQuality}
disabled={!hasValidConfig || selectedFiles.length === 0}
className="w-full sm:w-auto flex items-center justify-center px-4 py-2 bg-teal-600 text-white rounded-md hover:bg-teal-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
title={!hasValidConfig ? "Please check provider settings" : "Check caption quality for selected items"}
>
<CheckCircleIcon className="w-5 h-5 mr-2" />
<span>Check Quality ({selectedFiles.length})</span>
</button>
<button
onClick={handleDownload}
disabled={selectedFiles.length === 0}
className="w-full sm:w-auto flex items-center justify-center px-4 py-2 bg-green-600 text-white rounded-md hover:bg-green-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
>
<DownloadIcon className="w-5 h-5 mr-2" />
Download Selected ({selectedFiles.length})
</button>
<button
onClick={handleDeleteSelected}
disabled={selectedFiles.length === 0}
className="w-full sm:w-auto flex items-center justify-center px-4 py-2 bg-red-600 text-white rounded-md hover:bg-red-700 disabled:bg-gray-500 disabled:cursor-not-allowed transition-colors"
>
<TrashIcon className="w-5 h-5 mr-2" />
Delete Selected ({selectedFiles.length})
</button>
</div>
</section>
<section className="bg-gray-800/50 p-6 rounded-lg shadow-lg">
<h2 className="text-xl font-semibold mb-4">2. Upload Media</h2>
<FileUploader onFilesAdded={handleFilesAdded} />
</section>
{mediaFiles.length > 0 && (
<section>
<div className="flex justify-between items-center mb-4">
<h2 className="text-xl font-semibold">3. Caption & Refine</h2>
<div className="flex items-center space-x-2">
<input
ref={selectAllCheckboxRef}
type="checkbox"
id="select-all"
className="h-4 w-4 rounded border-gray-500 bg-gray-700 text-indigo-500 focus:ring-indigo-600"
checked={allSelected}
onChange={handleSelectAll}
/>
<label htmlFor="select-all" className="text-sm font-medium text-gray-300 cursor-pointer">
Select All
</label>
</div>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
{mediaFiles.map(item => (
<MediaItem
key={item.id}
item={item}
autofit={autofitTextareas}
isApiKeySet={hasValidConfig}
onGenerate={handleGenerateCaption}
onCaptionChange={handleCaptionChange}
onCustomInstructionsChange={handleCustomInstructionsChange}
onSelectionChange={handleSelectionChange}
/>
))}
</div>
</section>
)}
</main>
</div>
);
};
export default App;