Spaces:

Nirav-Madhani
/

Screen-VLA

Running

App Files Files Community

Screen-VLA / backend /server.js

Gemini

VLA Data Generator - Complete TypeScript/React app with backend

256cef9 about 2 months ago

history blame contribute delete

5.75 kB

	const express = require('express');
	const cors = require('cors');
	const multer = require('multer');
	const path = require('path');
	const { GoogleGenerativeAI } = require('@google/generative-ai');

	const app = express();
	const PORT = process.env.PORT \|\| 7860; // Use HF Spaces port

	// Get API key from environment variables
	const API_KEY = process.env.GEMINI_API_KEY \|\| process.env.API_KEY;
	if (!API_KEY) {
	console.error('Error: GEMINI_API_KEY or API_KEY environment variable is required');
	process.exit(1);
	}

	console.log('Initializing Google Generative AI...');
	const genAI = new GoogleGenerativeAI(API_KEY);

	// Middleware
	app.use(cors());
	app.use(express.json({ limit: '50mb' }));

	// Serve static files from frontend build
	app.use(express.static(path.join(__dirname, '../dist')));

	// Configure multer for file uploads
	const upload = multer({
	storage: multer.memoryStorage(),
	limits: {
	fileSize: 50 * 1024 * 1024, // 50MB limit
	}
	});

	// Health check endpoint
	app.get('/health', (req, res) => {
	res.json({ status: 'ok', message: 'VLA Backend Server is running' });
	});

	// Generate overall goal endpoint
	app.post('/api/generate-goal', async (req, res) => {
	try {
	const { frames, videoDuration } = req.body;

	if (!frames \|\| !Array.isArray(frames)) {
	return res.status(400).json({ error: 'Frames array is required' });
	}

	const model = genAI.getGenerativeModel({
	model: "gemini-1.5-flash"
	});

	// Create the prompt (you'll need to move the prompt logic here)
	const prompt = `Analyze these video frames and generate an overall goal for the user's actions.
	Video duration: ${videoDuration} seconds
	Frames: ${frames.length} total

	Please provide a concise overall goal description of what the user is trying to accomplish in this video.`;

	const result = await model.generateContent([
	{ text: prompt },
	...frames.map(frame => ({
	inlineData: {
	data: frame.split(',')[1], // Remove data URL prefix
	mimeType: 'image/jpeg'
	}
	}))
	]);

	const response = result.response;
	const text = response.text();

	res.json({ goal: text });
	} catch (error) {
	console.error('Error generating goal:', error);
	res.status(500).json({ error: 'Failed to generate goal' });
	}
	});

	// Generate tasks and interactions endpoint
	app.post('/api/generate-tasks', async (req, res) => {
	try {
	const { frames, goal, videoDuration, totalFrames } = req.body;

	if (!frames \|\| !Array.isArray(frames) \|\| !goal) {
	return res.status(400).json({ error: 'Frames array and goal are required' });
	}

	const model = genAI.getGenerativeModel({
	model: "gemini-1.5-flash"
	});

	// Create the prompt for tasks and interactions
	const prompt = `Based on the overall goal: "${goal}"

	Analyze these ${frames.length} video frames and generate detailed tasks and interactions.
	Video duration: ${videoDuration} seconds
	Total frames: ${totalFrames}

	Please provide a JSON response with tasks and interactions following this structure:
	{
	"tasks": [
	{
	"task_id": "task_1",
	"description": "Description of the task",
	"start_frame": 0,
	"end_frame": 10,
	"interactions": [
	{
	"interaction_id": "interaction_1",
	"type": "click\|scroll\|type\|drag",
	"description": "What action is being performed",
	"frame_number": 5,
	"coordinates": {"x": 100, "y": 200},
	"target_element": "Description of UI element"
	}
	]
	}
	]
	}`;

	const result = await model.generateContent([
	{ text: prompt },
	...frames.map(frame => ({
	inlineData: {
	data: frame.split(',')[1],
	mimeType: 'image/jpeg'
	}
	}))
	]);

	const response = result.response;
	const text = response.text();

	// Try to parse JSON response
	try {
	const jsonMatch = text.match(/```json\s([\s\S]?)\s```/) \|\| text.match(/\{[\s\S]\}/);
	if (jsonMatch) {
	const jsonStr = jsonMatch[1] \|\| jsonMatch[0];
	const parsedData = JSON.parse(jsonStr);
	res.json(parsedData);
	} else {
	// Fallback if JSON parsing fails
	res.json({
	tasks: [{
	task_id: "task_1",
	description: text,
	start_frame: 0,
	end_frame: frames.length - 1,
	interactions: []
	}]
	});
	}
	} catch (parseError) {
	console.error('JSON parsing error:', parseError);
	res.json({
	tasks: [{
	task_id: "task_1",
	description: text,
	start_frame: 0,
	end_frame: frames.length - 1,
	interactions: []
	}]
	});
	}
	} catch (error) {
	console.error('Error generating tasks:', error);
	res.status(500).json({ error: 'Failed to generate tasks and interactions' });
	}
	});

	// Serve frontend for all other routes
	app.get('*', (req, res) => {
	res.sendFile(path.join(__dirname, '../dist/index.html'));
	});

	app.listen(PORT, '0.0.0.0', () => {
	console.log(`VLA Data Generator running on port ${PORT}`);
	console.log(`API Key configured: ${API_KEY ? 'Yes' : 'No'}`);
	});