Spaces:
Running
Running
const express = require('express'); | |
const cors = require('cors'); | |
const multer = require('multer'); | |
const path = require('path'); | |
const { GoogleGenerativeAI } = require('@google/generative-ai'); | |
const app = express(); | |
const PORT = process.env.PORT || 7860; // Use HF Spaces port | |
// Get API key from environment variables | |
const API_KEY = process.env.GEMINI_API_KEY || process.env.API_KEY; | |
if (!API_KEY) { | |
console.error('Error: GEMINI_API_KEY or API_KEY environment variable is required'); | |
process.exit(1); | |
} | |
console.log('Initializing Google Generative AI...'); | |
const genAI = new GoogleGenerativeAI(API_KEY); | |
// Middleware | |
app.use(cors()); | |
app.use(express.json({ limit: '50mb' })); | |
// Serve static files from frontend build | |
app.use(express.static(path.join(__dirname, '../dist'))); | |
// Configure multer for file uploads | |
const upload = multer({ | |
storage: multer.memoryStorage(), | |
limits: { | |
fileSize: 50 * 1024 * 1024, // 50MB limit | |
} | |
}); | |
// Health check endpoint | |
app.get('/health', (req, res) => { | |
res.json({ status: 'ok', message: 'VLA Backend Server is running' }); | |
}); | |
// Generate overall goal endpoint | |
app.post('/api/generate-goal', async (req, res) => { | |
try { | |
const { frames, videoDuration } = req.body; | |
if (!frames || !Array.isArray(frames)) { | |
return res.status(400).json({ error: 'Frames array is required' }); | |
} | |
const model = genAI.getGenerativeModel({ | |
model: "gemini-1.5-flash" | |
}); | |
// Create the prompt (you'll need to move the prompt logic here) | |
const prompt = `Analyze these video frames and generate an overall goal for the user's actions. | |
Video duration: ${videoDuration} seconds | |
Frames: ${frames.length} total | |
Please provide a concise overall goal description of what the user is trying to accomplish in this video.`; | |
const result = await model.generateContent([ | |
{ text: prompt }, | |
...frames.map(frame => ({ | |
inlineData: { | |
data: frame.split(',')[1], // Remove data URL prefix | |
mimeType: 'image/jpeg' | |
} | |
})) | |
]); | |
const response = result.response; | |
const text = response.text(); | |
res.json({ goal: text }); | |
} catch (error) { | |
console.error('Error generating goal:', error); | |
res.status(500).json({ error: 'Failed to generate goal' }); | |
} | |
}); | |
// Generate tasks and interactions endpoint | |
app.post('/api/generate-tasks', async (req, res) => { | |
try { | |
const { frames, goal, videoDuration, totalFrames } = req.body; | |
if (!frames || !Array.isArray(frames) || !goal) { | |
return res.status(400).json({ error: 'Frames array and goal are required' }); | |
} | |
const model = genAI.getGenerativeModel({ | |
model: "gemini-1.5-flash" | |
}); | |
// Create the prompt for tasks and interactions | |
const prompt = `Based on the overall goal: "${goal}" | |
Analyze these ${frames.length} video frames and generate detailed tasks and interactions. | |
Video duration: ${videoDuration} seconds | |
Total frames: ${totalFrames} | |
Please provide a JSON response with tasks and interactions following this structure: | |
{ | |
"tasks": [ | |
{ | |
"task_id": "task_1", | |
"description": "Description of the task", | |
"start_frame": 0, | |
"end_frame": 10, | |
"interactions": [ | |
{ | |
"interaction_id": "interaction_1", | |
"type": "click|scroll|type|drag", | |
"description": "What action is being performed", | |
"frame_number": 5, | |
"coordinates": {"x": 100, "y": 200}, | |
"target_element": "Description of UI element" | |
} | |
] | |
} | |
] | |
}`; | |
const result = await model.generateContent([ | |
{ text: prompt }, | |
...frames.map(frame => ({ | |
inlineData: { | |
data: frame.split(',')[1], | |
mimeType: 'image/jpeg' | |
} | |
})) | |
]); | |
const response = result.response; | |
const text = response.text(); | |
// Try to parse JSON response | |
try { | |
const jsonMatch = text.match(/```json\s*([\s\S]*?)\s*```/) || text.match(/\{[\s\S]*\}/); | |
if (jsonMatch) { | |
const jsonStr = jsonMatch[1] || jsonMatch[0]; | |
const parsedData = JSON.parse(jsonStr); | |
res.json(parsedData); | |
} else { | |
// Fallback if JSON parsing fails | |
res.json({ | |
tasks: [{ | |
task_id: "task_1", | |
description: text, | |
start_frame: 0, | |
end_frame: frames.length - 1, | |
interactions: [] | |
}] | |
}); | |
} | |
} catch (parseError) { | |
console.error('JSON parsing error:', parseError); | |
res.json({ | |
tasks: [{ | |
task_id: "task_1", | |
description: text, | |
start_frame: 0, | |
end_frame: frames.length - 1, | |
interactions: [] | |
}] | |
}); | |
} | |
} catch (error) { | |
console.error('Error generating tasks:', error); | |
res.status(500).json({ error: 'Failed to generate tasks and interactions' }); | |
} | |
}); | |
// Serve frontend for all other routes | |
app.get('*', (req, res) => { | |
res.sendFile(path.join(__dirname, '../dist/index.html')); | |
}); | |
app.listen(PORT, '0.0.0.0', () => { | |
console.log(`VLA Data Generator running on port ${PORT}`); | |
console.log(`API Key configured: ${API_KEY ? 'Yes' : 'No'}`); | |
}); | |