import React, { useState, useCallback, useRef } from 'react'; import { VlaData, TaskSegment, Interaction } from './types'; import { generateOverallGoal, generateTasksAndInteractions } from './services/backendService'; import { extractFramesFromVideo } from './utils/videoProcessor'; import { VideoUploader } from './components/VideoUploader'; import { VideoPlayer } from './components/VideoPlayer'; import { ResultsDisplay } from './components/ResultsDisplay'; import { WandSparkles } from './components/Icons'; // Type for the point to be highlighted on the video type HighlightPoint = { x: number; y: number; isEditing: boolean } | null; // Type for the coordinate picker callback function type CoordinatePickerCallback = ((coords: { x: number; y: number }) => void) | null; export default function App(): React.ReactNode { const [videoFile, setVideoFile] = useState(null); const [videoSrc, setVideoSrc] = useState(null); const [videoDuration, setVideoDuration] = useState(0); const [vlaData, setVlaData] = useState(null); const [isLoading, setIsLoading] = useState(false); const [loadingMessage, setLoadingMessage] = useState(''); const [error, setError] = useState(null); const [totalFrames, setTotalFrames] = useState(0); const [highlightPoint, setHighlightPoint] = useState(null); const [coordinatePicker, setCoordinatePicker] = useState(null); const [usedFallback, setUsedFallback] = useState(false); const videoRef = useRef(null); const isGeneratingRef = useRef(false); const handleVideoUpload = useCallback((file: File) => { if (file.type.startsWith('video/')) { setVideoFile(file); if (videoSrc) { URL.revokeObjectURL(videoSrc); } const url = URL.createObjectURL(file); setVideoSrc(url); setVlaData(null); setError(null); setUsedFallback(false); setVideoDuration(0); setTotalFrames(0); const videoElement = document.createElement('video'); videoElement.preload = 'metadata'; videoElement.src = url; videoElement.onloadedmetadata = () => { setVideoDuration(videoElement.duration); }; videoElement.onerror = () => { setError("Could not read video metadata to get duration."); }; } else { setError('Please upload a valid video file.'); } }, [videoSrc]); const handleGenerate = useCallback(async () => { if (!videoFile || isGeneratingRef.current) { if (!videoFile) setError('No video file selected.'); return; } isGeneratingRef.current = true; setIsLoading(true); setError(null); setVlaData(null); setUsedFallback(false); // Reset on each generation try { const FRAMES_PER_SECOND = 2; // Extract 2 frames per second const MAX_FRAMES_TOTAL = 360; // Cap at 360 frames (e.g., 3 minutes at 2fps) to manage memory/performance let calculatedFrames = Math.ceil(videoDuration * FRAMES_PER_SECOND); if (calculatedFrames > MAX_FRAMES_TOTAL) { calculatedFrames = MAX_FRAMES_TOTAL; } if (calculatedFrames === 0 && videoDuration > 0) { calculatedFrames = 1; // ensure at least one frame for very short videos } setTotalFrames(calculatedFrames); // Step 1: Extract frames setLoadingMessage(`Step 1/3: Extracting ${calculatedFrames} frames from video...`); const frames = await extractFramesFromVideo(videoFile, calculatedFrames); if (frames.length === 0) { throw new Error("Could not extract any frames from the video. The file might be corrupted or in an unsupported format."); } // Step 2: Generate Overall Goal setLoadingMessage('Step 2/3: Determining overall goal...'); const keyframes = [frames[0], frames[Math.floor(frames.length / 2)], frames[frames.length - 1]]; const overallGoal = await generateOverallGoal(keyframes, videoDuration); const initialVlaData: VlaData = { overallGoal, tasks: [] }; setVlaData(initialVlaData); // Step 3: Generate Task Segments and Interactions in one go setLoadingMessage('Step 3/3: Analyzing tasks and interactions...'); const vlaData = await generateTasksAndInteractions( frames, overallGoal, videoDuration, totalFrames, (current, total) => { // Progress callback - you could update loading message here console.log(`Progress: ${current}/${total}`); } ); setVlaData(vlaData); } catch (err) { console.error(err); const errorMessage = err instanceof Error ? err.message : 'An unknown error occurred.'; setError(`Failed to process video. ${errorMessage}`); setVlaData(null); // Clear partial data on major failure } finally { setIsLoading(false); setLoadingMessage(''); isGeneratingRef.current = false; } }, [videoFile, videoDuration]); const handleDownload = useCallback(() => { if (!vlaData || !videoFile) return; const dataStr = JSON.stringify(vlaData, null, 2); const dataBlob = new Blob([dataStr], { type: 'application/json' }); const dataUrl = URL.createObjectURL(dataBlob); const link = document.createElement('a'); link.href = dataUrl; const baseName = videoFile.name.substring(0, videoFile.name.lastIndexOf('.')) || videoFile.name; link.download = `${baseName}_vla_data.json`; document.body.appendChild(link); link.click(); document.body.removeChild(link); URL.revokeObjectURL(dataUrl); }, [vlaData, videoFile]); const handleSeekToTime = useCallback((time: number) => { if (videoRef.current) { videoRef.current.currentTime = time; } }, []); const handleUpdateInteraction = useCallback((taskId: number, interactionIndex: number, updatedInteraction: Interaction) => { setVlaData(currentData => { if (!currentData) return null; const newTasks = currentData.tasks.map(task => { if (task.id === taskId) { const newInteractions = [...task.interactions]; newInteractions[interactionIndex] = updatedInteraction; return { ...task, interactions: newInteractions }; } return task; }); return { ...currentData, tasks: newTasks }; }); }, []); const handleVideoClick = useCallback((coords: { x: number; y: number }) => { if (coordinatePicker) { coordinatePicker(coords); } }, [coordinatePicker]); const handleHighlightPoint = useCallback((point: HighlightPoint) => { // Prevent hover from overriding a sticky editing highlight if (highlightPoint?.isEditing && !point?.isEditing) { return; } setHighlightPoint(point); }, [highlightPoint?.isEditing]); const handleSetCoordinatePicker = useCallback((callback: CoordinatePickerCallback) => { setCoordinatePicker(() => callback); }, []); return (
{/* Left Column: Video and Controls */}

VLA Data Generator

Upload a screen recording to automatically generate structured data about user actions.

{videoSrc ? ( ) : ( )}
{videoFile && ( )}
{/* Right Column: Results */}
); }