Spaces:
Sleeping
Sleeping
File size: 5,745 Bytes
256cef9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
const express = require('express');
const cors = require('cors');
const multer = require('multer');
const path = require('path');
const { GoogleGenerativeAI } = require('@google/generative-ai');
const app = express();
const PORT = process.env.PORT || 7860; // Use HF Spaces port
// Get API key from environment variables
const API_KEY = process.env.GEMINI_API_KEY || process.env.API_KEY;
if (!API_KEY) {
console.error('Error: GEMINI_API_KEY or API_KEY environment variable is required');
process.exit(1);
}
console.log('Initializing Google Generative AI...');
const genAI = new GoogleGenerativeAI(API_KEY);
// Middleware
app.use(cors());
app.use(express.json({ limit: '50mb' }));
// Serve static files from frontend build
app.use(express.static(path.join(__dirname, '../dist')));
// Configure multer for file uploads
const upload = multer({
storage: multer.memoryStorage(),
limits: {
fileSize: 50 * 1024 * 1024, // 50MB limit
}
});
// Health check endpoint
app.get('/health', (req, res) => {
res.json({ status: 'ok', message: 'VLA Backend Server is running' });
});
// Generate overall goal endpoint
app.post('/api/generate-goal', async (req, res) => {
try {
const { frames, videoDuration } = req.body;
if (!frames || !Array.isArray(frames)) {
return res.status(400).json({ error: 'Frames array is required' });
}
const model = genAI.getGenerativeModel({
model: "gemini-1.5-flash"
});
// Create the prompt (you'll need to move the prompt logic here)
const prompt = `Analyze these video frames and generate an overall goal for the user's actions.
Video duration: ${videoDuration} seconds
Frames: ${frames.length} total
Please provide a concise overall goal description of what the user is trying to accomplish in this video.`;
const result = await model.generateContent([
{ text: prompt },
...frames.map(frame => ({
inlineData: {
data: frame.split(',')[1], // Remove data URL prefix
mimeType: 'image/jpeg'
}
}))
]);
const response = result.response;
const text = response.text();
res.json({ goal: text });
} catch (error) {
console.error('Error generating goal:', error);
res.status(500).json({ error: 'Failed to generate goal' });
}
});
// Generate tasks and interactions endpoint
app.post('/api/generate-tasks', async (req, res) => {
try {
const { frames, goal, videoDuration, totalFrames } = req.body;
if (!frames || !Array.isArray(frames) || !goal) {
return res.status(400).json({ error: 'Frames array and goal are required' });
}
const model = genAI.getGenerativeModel({
model: "gemini-1.5-flash"
});
// Create the prompt for tasks and interactions
const prompt = `Based on the overall goal: "${goal}"
Analyze these ${frames.length} video frames and generate detailed tasks and interactions.
Video duration: ${videoDuration} seconds
Total frames: ${totalFrames}
Please provide a JSON response with tasks and interactions following this structure:
{
"tasks": [
{
"task_id": "task_1",
"description": "Description of the task",
"start_frame": 0,
"end_frame": 10,
"interactions": [
{
"interaction_id": "interaction_1",
"type": "click|scroll|type|drag",
"description": "What action is being performed",
"frame_number": 5,
"coordinates": {"x": 100, "y": 200},
"target_element": "Description of UI element"
}
]
}
]
}`;
const result = await model.generateContent([
{ text: prompt },
...frames.map(frame => ({
inlineData: {
data: frame.split(',')[1],
mimeType: 'image/jpeg'
}
}))
]);
const response = result.response;
const text = response.text();
// Try to parse JSON response
try {
const jsonMatch = text.match(/```json\s*([\s\S]*?)\s*```/) || text.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const jsonStr = jsonMatch[1] || jsonMatch[0];
const parsedData = JSON.parse(jsonStr);
res.json(parsedData);
} else {
// Fallback if JSON parsing fails
res.json({
tasks: [{
task_id: "task_1",
description: text,
start_frame: 0,
end_frame: frames.length - 1,
interactions: []
}]
});
}
} catch (parseError) {
console.error('JSON parsing error:', parseError);
res.json({
tasks: [{
task_id: "task_1",
description: text,
start_frame: 0,
end_frame: frames.length - 1,
interactions: []
}]
});
}
} catch (error) {
console.error('Error generating tasks:', error);
res.status(500).json({ error: 'Failed to generate tasks and interactions' });
}
});
// Serve frontend for all other routes
app.get('*', (req, res) => {
res.sendFile(path.join(__dirname, '../dist/index.html'));
});
app.listen(PORT, '0.0.0.0', () => {
console.log(`VLA Data Generator running on port ${PORT}`);
console.log(`API Key configured: ${API_KEY ? 'Yes' : 'No'}`);
});
|