Spaces:
Running
on
Zero
Running
on
Zero
import logging | |
import base64 | |
import aiohttp | |
import torch | |
from io import BytesIO | |
from typing import Optional | |
from typing_extensions import override | |
from comfy_api.latest import ComfyExtension, io as comfy_io | |
from comfy_api.input_impl.video_types import VideoFromFile | |
from comfy_api_nodes.apis import ( | |
VeoGenVidRequest, | |
VeoGenVidResponse, | |
VeoGenVidPollRequest, | |
VeoGenVidPollResponse, | |
) | |
from comfy_api_nodes.apis.client import ( | |
ApiEndpoint, | |
HttpMethod, | |
SynchronousOperation, | |
PollingOperation, | |
) | |
from comfy_api_nodes.apinode_utils import ( | |
downscale_image_tensor, | |
tensor_to_base64_string, | |
) | |
AVERAGE_DURATION_VIDEO_GEN = 32 | |
def convert_image_to_base64(image: torch.Tensor): | |
if image is None: | |
return None | |
scaled_image = downscale_image_tensor(image, total_pixels=2048*2048) | |
return tensor_to_base64_string(scaled_image) | |
def get_video_url_from_response(poll_response: VeoGenVidPollResponse) -> Optional[str]: | |
if ( | |
poll_response.response | |
and hasattr(poll_response.response, "videos") | |
and poll_response.response.videos | |
and len(poll_response.response.videos) > 0 | |
): | |
video = poll_response.response.videos[0] | |
else: | |
return None | |
if hasattr(video, "gcsUri") and video.gcsUri: | |
return str(video.gcsUri) | |
return None | |
class VeoVideoGenerationNode(comfy_io.ComfyNode): | |
""" | |
Generates videos from text prompts using Google's Veo API. | |
This node can create videos from text descriptions and optional image inputs, | |
with control over parameters like aspect ratio, duration, and more. | |
""" | |
def define_schema(cls): | |
return comfy_io.Schema( | |
node_id="VeoVideoGenerationNode", | |
display_name="Google Veo 2 Video Generation", | |
category="api node/video/Veo", | |
description="Generates videos from text prompts using Google's Veo 2 API", | |
inputs=[ | |
comfy_io.String.Input( | |
"prompt", | |
multiline=True, | |
default="", | |
tooltip="Text description of the video", | |
), | |
comfy_io.Combo.Input( | |
"aspect_ratio", | |
options=["16:9", "9:16"], | |
default="16:9", | |
tooltip="Aspect ratio of the output video", | |
), | |
comfy_io.String.Input( | |
"negative_prompt", | |
multiline=True, | |
default="", | |
tooltip="Negative text prompt to guide what to avoid in the video", | |
optional=True, | |
), | |
comfy_io.Int.Input( | |
"duration_seconds", | |
default=5, | |
min=5, | |
max=8, | |
step=1, | |
display_mode=comfy_io.NumberDisplay.number, | |
tooltip="Duration of the output video in seconds", | |
optional=True, | |
), | |
comfy_io.Boolean.Input( | |
"enhance_prompt", | |
default=True, | |
tooltip="Whether to enhance the prompt with AI assistance", | |
optional=True, | |
), | |
comfy_io.Combo.Input( | |
"person_generation", | |
options=["ALLOW", "BLOCK"], | |
default="ALLOW", | |
tooltip="Whether to allow generating people in the video", | |
optional=True, | |
), | |
comfy_io.Int.Input( | |
"seed", | |
default=0, | |
min=0, | |
max=0xFFFFFFFF, | |
step=1, | |
display_mode=comfy_io.NumberDisplay.number, | |
control_after_generate=True, | |
tooltip="Seed for video generation (0 for random)", | |
optional=True, | |
), | |
comfy_io.Image.Input( | |
"image", | |
tooltip="Optional reference image to guide video generation", | |
optional=True, | |
), | |
comfy_io.Combo.Input( | |
"model", | |
options=["veo-2.0-generate-001"], | |
default="veo-2.0-generate-001", | |
tooltip="Veo 2 model to use for video generation", | |
optional=True, | |
), | |
], | |
outputs=[ | |
comfy_io.Video.Output(), | |
], | |
hidden=[ | |
comfy_io.Hidden.auth_token_comfy_org, | |
comfy_io.Hidden.api_key_comfy_org, | |
comfy_io.Hidden.unique_id, | |
], | |
is_api_node=True, | |
) | |
async def execute( | |
cls, | |
prompt, | |
aspect_ratio="16:9", | |
negative_prompt="", | |
duration_seconds=5, | |
enhance_prompt=True, | |
person_generation="ALLOW", | |
seed=0, | |
image=None, | |
model="veo-2.0-generate-001", | |
generate_audio=False, | |
): | |
# Prepare the instances for the request | |
instances = [] | |
instance = { | |
"prompt": prompt | |
} | |
# Add image if provided | |
if image is not None: | |
image_base64 = convert_image_to_base64(image) | |
if image_base64: | |
instance["image"] = { | |
"bytesBase64Encoded": image_base64, | |
"mimeType": "image/png" | |
} | |
instances.append(instance) | |
# Create parameters dictionary | |
parameters = { | |
"aspectRatio": aspect_ratio, | |
"personGeneration": person_generation, | |
"durationSeconds": duration_seconds, | |
"enhancePrompt": enhance_prompt, | |
} | |
# Add optional parameters if provided | |
if negative_prompt: | |
parameters["negativePrompt"] = negative_prompt | |
if seed > 0: | |
parameters["seed"] = seed | |
# Only add generateAudio for Veo 3 models | |
if "veo-3.0" in model: | |
parameters["generateAudio"] = generate_audio | |
auth = { | |
"auth_token": cls.hidden.auth_token_comfy_org, | |
"comfy_api_key": cls.hidden.api_key_comfy_org, | |
} | |
# Initial request to start video generation | |
initial_operation = SynchronousOperation( | |
endpoint=ApiEndpoint( | |
path=f"/proxy/veo/{model}/generate", | |
method=HttpMethod.POST, | |
request_model=VeoGenVidRequest, | |
response_model=VeoGenVidResponse | |
), | |
request=VeoGenVidRequest( | |
instances=instances, | |
parameters=parameters | |
), | |
auth_kwargs=auth, | |
) | |
initial_response = await initial_operation.execute() | |
operation_name = initial_response.name | |
logging.info(f"Veo generation started with operation name: {operation_name}") | |
# Define status extractor function | |
def status_extractor(response): | |
# Only return "completed" if the operation is done, regardless of success or failure | |
# We'll check for errors after polling completes | |
return "completed" if response.done else "pending" | |
# Define progress extractor function | |
def progress_extractor(response): | |
# Could be enhanced if the API provides progress information | |
return None | |
# Define the polling operation | |
poll_operation = PollingOperation( | |
poll_endpoint=ApiEndpoint( | |
path=f"/proxy/veo/{model}/poll", | |
method=HttpMethod.POST, | |
request_model=VeoGenVidPollRequest, | |
response_model=VeoGenVidPollResponse | |
), | |
completed_statuses=["completed"], | |
failed_statuses=[], # No failed statuses, we'll handle errors after polling | |
status_extractor=status_extractor, | |
progress_extractor=progress_extractor, | |
request=VeoGenVidPollRequest( | |
operationName=operation_name | |
), | |
auth_kwargs=auth, | |
poll_interval=5.0, | |
result_url_extractor=get_video_url_from_response, | |
node_id=cls.hidden.unique_id, | |
estimated_duration=AVERAGE_DURATION_VIDEO_GEN, | |
) | |
# Execute the polling operation | |
poll_response = await poll_operation.execute() | |
# Now check for errors in the final response | |
# Check for error in poll response | |
if hasattr(poll_response, 'error') and poll_response.error: | |
error_message = f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})" | |
logging.error(error_message) | |
raise Exception(error_message) | |
# Check for RAI filtered content | |
if (hasattr(poll_response.response, 'raiMediaFilteredCount') and | |
poll_response.response.raiMediaFilteredCount > 0): | |
# Extract reason message if available | |
if (hasattr(poll_response.response, 'raiMediaFilteredReasons') and | |
poll_response.response.raiMediaFilteredReasons): | |
reason = poll_response.response.raiMediaFilteredReasons[0] | |
error_message = f"Content filtered by Google's Responsible AI practices: {reason} ({poll_response.response.raiMediaFilteredCount} videos filtered.)" | |
else: | |
error_message = f"Content filtered by Google's Responsible AI practices ({poll_response.response.raiMediaFilteredCount} videos filtered.)" | |
logging.error(error_message) | |
raise Exception(error_message) | |
# Extract video data | |
if poll_response.response and hasattr(poll_response.response, 'videos') and poll_response.response.videos and len(poll_response.response.videos) > 0: | |
video = poll_response.response.videos[0] | |
# Check if video is provided as base64 or URL | |
if hasattr(video, 'bytesBase64Encoded') and video.bytesBase64Encoded: | |
# Decode base64 string to bytes | |
video_data = base64.b64decode(video.bytesBase64Encoded) | |
elif hasattr(video, 'gcsUri') and video.gcsUri: | |
# Download from URL | |
async with aiohttp.ClientSession() as session: | |
async with session.get(video.gcsUri) as video_response: | |
video_data = await video_response.content.read() | |
else: | |
raise Exception("Video returned but no data or URL was provided") | |
else: | |
raise Exception("Video generation completed but no video was returned") | |
if not video_data: | |
raise Exception("No video data was returned") | |
logging.info("Video generation completed successfully") | |
# Convert video data to BytesIO object | |
video_io = BytesIO(video_data) | |
# Return VideoFromFile object | |
return comfy_io.NodeOutput(VideoFromFile(video_io)) | |
class Veo3VideoGenerationNode(VeoVideoGenerationNode): | |
""" | |
Generates videos from text prompts using Google's Veo 3 API. | |
Supported models: | |
- veo-3.0-generate-001 | |
- veo-3.0-fast-generate-001 | |
This node extends the base Veo node with Veo 3 specific features including | |
audio generation and fixed 8-second duration. | |
""" | |
def define_schema(cls): | |
return comfy_io.Schema( | |
node_id="Veo3VideoGenerationNode", | |
display_name="Google Veo 3 Video Generation", | |
category="api node/video/Veo", | |
description="Generates videos from text prompts using Google's Veo 3 API", | |
inputs=[ | |
comfy_io.String.Input( | |
"prompt", | |
multiline=True, | |
default="", | |
tooltip="Text description of the video", | |
), | |
comfy_io.Combo.Input( | |
"aspect_ratio", | |
options=["16:9", "9:16"], | |
default="16:9", | |
tooltip="Aspect ratio of the output video", | |
), | |
comfy_io.String.Input( | |
"negative_prompt", | |
multiline=True, | |
default="", | |
tooltip="Negative text prompt to guide what to avoid in the video", | |
optional=True, | |
), | |
comfy_io.Int.Input( | |
"duration_seconds", | |
default=8, | |
min=8, | |
max=8, | |
step=1, | |
display_mode=comfy_io.NumberDisplay.number, | |
tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)", | |
optional=True, | |
), | |
comfy_io.Boolean.Input( | |
"enhance_prompt", | |
default=True, | |
tooltip="Whether to enhance the prompt with AI assistance", | |
optional=True, | |
), | |
comfy_io.Combo.Input( | |
"person_generation", | |
options=["ALLOW", "BLOCK"], | |
default="ALLOW", | |
tooltip="Whether to allow generating people in the video", | |
optional=True, | |
), | |
comfy_io.Int.Input( | |
"seed", | |
default=0, | |
min=0, | |
max=0xFFFFFFFF, | |
step=1, | |
display_mode=comfy_io.NumberDisplay.number, | |
control_after_generate=True, | |
tooltip="Seed for video generation (0 for random)", | |
optional=True, | |
), | |
comfy_io.Image.Input( | |
"image", | |
tooltip="Optional reference image to guide video generation", | |
optional=True, | |
), | |
comfy_io.Combo.Input( | |
"model", | |
options=["veo-3.0-generate-001", "veo-3.0-fast-generate-001"], | |
default="veo-3.0-generate-001", | |
tooltip="Veo 3 model to use for video generation", | |
optional=True, | |
), | |
comfy_io.Boolean.Input( | |
"generate_audio", | |
default=False, | |
tooltip="Generate audio for the video. Supported by all Veo 3 models.", | |
optional=True, | |
), | |
], | |
outputs=[ | |
comfy_io.Video.Output(), | |
], | |
hidden=[ | |
comfy_io.Hidden.auth_token_comfy_org, | |
comfy_io.Hidden.api_key_comfy_org, | |
comfy_io.Hidden.unique_id, | |
], | |
is_api_node=True, | |
) | |
class VeoExtension(ComfyExtension): | |
async def get_node_list(self) -> list[type[comfy_io.ComfyNode]]: | |
return [ | |
VeoVideoGenerationNode, | |
Veo3VideoGenerationNode, | |
] | |
async def comfy_entrypoint() -> VeoExtension: | |
return VeoExtension() | |