const express = require('express'); const cors = require('cors'); const multer = require('multer'); const path = require('path'); const { GoogleGenerativeAI } = require('@google/generative-ai'); const app = express(); const PORT = process.env.PORT || 7860; // Use HF Spaces port // Get API key from environment variables const API_KEY = process.env.GEMINI_API_KEY || process.env.API_KEY; if (!API_KEY) { console.error('Error: GEMINI_API_KEY or API_KEY environment variable is required'); process.exit(1); } console.log('Initializing Google Generative AI...'); const genAI = new GoogleGenerativeAI(API_KEY); // Middleware app.use(cors()); app.use(express.json({ limit: '50mb' })); // Serve static files from frontend build app.use(express.static(path.join(__dirname, '../dist'))); // Configure multer for file uploads const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 50 * 1024 * 1024, // 50MB limit } }); // Health check endpoint app.get('/health', (req, res) => { res.json({ status: 'ok', message: 'VLA Backend Server is running' }); }); // Generate overall goal endpoint app.post('/api/generate-goal', async (req, res) => { try { const { frames, videoDuration } = req.body; if (!frames || !Array.isArray(frames)) { return res.status(400).json({ error: 'Frames array is required' }); } const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); // Create the prompt (you'll need to move the prompt logic here) const prompt = `Analyze these video frames and generate an overall goal for the user's actions. Video duration: ${videoDuration} seconds Frames: ${frames.length} total Please provide a concise overall goal description of what the user is trying to accomplish in this video.`; const result = await model.generateContent([ { text: prompt }, ...frames.map(frame => ({ inlineData: { data: frame.split(',')[1], // Remove data URL prefix mimeType: 'image/jpeg' } })) ]); const response = result.response; const text = response.text(); res.json({ goal: text }); } catch (error) { console.error('Error generating goal:', error); res.status(500).json({ error: 'Failed to generate goal' }); } }); // Generate tasks and interactions endpoint app.post('/api/generate-tasks', async (req, res) => { try { const { frames, goal, videoDuration, totalFrames } = req.body; if (!frames || !Array.isArray(frames) || !goal) { return res.status(400).json({ error: 'Frames array and goal are required' }); } const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); // Create the prompt for tasks and interactions const prompt = `Based on the overall goal: "${goal}" Analyze these ${frames.length} video frames and generate detailed tasks and interactions. Video duration: ${videoDuration} seconds Total frames: ${totalFrames} Please provide a JSON response with tasks and interactions following this structure: { "tasks": [ { "task_id": "task_1", "description": "Description of the task", "start_frame": 0, "end_frame": 10, "interactions": [ { "interaction_id": "interaction_1", "type": "click|scroll|type|drag", "description": "What action is being performed", "frame_number": 5, "coordinates": {"x": 100, "y": 200}, "target_element": "Description of UI element" } ] } ] }`; const result = await model.generateContent([ { text: prompt }, ...frames.map(frame => ({ inlineData: { data: frame.split(',')[1], mimeType: 'image/jpeg' } })) ]); const response = result.response; const text = response.text(); // Try to parse JSON response try { const jsonMatch = text.match(/```json\s*([\s\S]*?)\s*```/) || text.match(/\{[\s\S]*\}/); if (jsonMatch) { const jsonStr = jsonMatch[1] || jsonMatch[0]; const parsedData = JSON.parse(jsonStr); res.json(parsedData); } else { // Fallback if JSON parsing fails res.json({ tasks: [{ task_id: "task_1", description: text, start_frame: 0, end_frame: frames.length - 1, interactions: [] }] }); } } catch (parseError) { console.error('JSON parsing error:', parseError); res.json({ tasks: [{ task_id: "task_1", description: text, start_frame: 0, end_frame: frames.length - 1, interactions: [] }] }); } } catch (error) { console.error('Error generating tasks:', error); res.status(500).json({ error: 'Failed to generate tasks and interactions' }); } }); // Serve frontend for all other routes app.get('*', (req, res) => { res.sendFile(path.join(__dirname, '../dist/index.html')); }); app.listen(PORT, '0.0.0.0', () => { console.log(`VLA Data Generator running on port ${PORT}`); console.log(`API Key configured: ${API_KEY ? 'Yes' : 'No'}`); });