Spaces:

K00B404
/

3Luik

Sleeping

File size: 11,046 Bytes

dc24492

import os
import base64
import io
import requests
from typing import Dict, Any, Optional, List
from PIL import Image
import numpy as np

class AIImageVideoPipeline:
    """
    Comprehensive AI-powered Image-to-Video Generation Pipeline
    
    ## Workflow Stages
    1. Initial Image Generation
    2. Iterative Outpainting
    3. LTX Video Transformation
    
    ## Technical Architecture
    - Modular design with configurable components
    - Support for multiple AI inference endpoints
    - Robust error handling and logging
    """
    
    def __init__(
        self, 
        image_generation_endpoint: Optional[str] = None,
        outpainting_endpoint: Optional[str] = None,
        ltx_video_endpoint: Optional[str] = None,
        api_token: Optional[str] = None
    ):
        """
        Initialize the AI Image-to-Video pipeline.
        
        Args:
            image_generation_endpoint (str): Endpoint for initial image generation
            outpainting_endpoint (str): Endpoint for image outpainting
            ltx_video_endpoint (str): Endpoint for LTX video generation
            api_token (str): Authentication token for API calls
        """
        self.endpoints = {
            'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'),
            'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'),
            'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT')
        }
        self.api_token = api_token or os.getenv('HF_API_TOKEN')
        
        # Validate endpoint configuration
        self._validate_endpoints()
    
    def _validate_endpoints(self):
        """
        Validate configured API endpoints.
        
        Raises:
            ValueError: If any required endpoint is missing
        """
        missing_endpoints = [
            key for key, value in self.endpoints.items() 
            if not value
        ]
        
        if missing_endpoints:
            raise ValueError(
                f"Missing API endpoints: {', '.join(missing_endpoints)}. "
                "Please configure via parameters or environment variables."
            )
    
    def encode_image(
        self, 
        image: Image.Image, 
        format: str = 'JPEG'
    ) -> str:
        """
        Encode PIL Image to base64 data URI.
        
        Args:
            image (Image.Image): Input image
            format (str): Output image format
        
        Returns:
            str: Base64 encoded data URI
        """
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format=format)
        base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
        return f"data:image/{format.lower()};base64,{base64_encoded}"
    
    def generate_initial_image(
        self, 
        prompt: str, 
        width: int = 768, 
        height: int = 480
    ) -> Image.Image:
        """
        Generate initial image using text prompt.
        
        Args:
            prompt (str): Image generation prompt
            width (int): Image width
            height (int): Image height
        
        Returns:
            Image.Image: Generated image
        """
        payload = {
            "inputs": prompt,
            "parameters": {
                "width": width,
                "height": height
            }
        }
        
        response = self._make_api_call(
            self.endpoints['image_gen'], 
            payload
        )
        
        return self._decode_image_response(response)
    
    def iterative_outpainting(
        self, 
        image: Image.Image, 
        prompt: str, 
        iterations: int = 3,
        padding_size: int = 256
    ) -> Image.Image:
        """
        Perform iterative outpainting to expand image.
        
        Args:
            image (Image.Image): Starting image
            prompt (str): Outpainting generation prompt
            iterations (int): Number of outpainting steps
            padding_size (int): Padding size for each iteration
        
        Returns:
            Image.Image: Final outpainted image
        """
        current_image = image.copy()
        
        for _ in range(iterations):
            # Create padded image
            padded_size = (
                current_image.width + 2 * padding_size, 
                current_image.height + 2 * padding_size
            )
            padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0))
            padded_image.paste(
                current_image, 
                (padding_size, padding_size)
            )
            
            # Create mask for padding regions
            mask = self._create_padding_mask(padded_image, padding_size)
            
            # Outpainting request
            payload = {
                "inputs": prompt,
                "image": self.encode_image(padded_image),
                "mask_image": self.encode_image(mask)
            }
            
            response = self._make_api_call(
                self.endpoints['outpainting'], 
                payload
            )
            
            current_image = self._decode_image_response(response)
        
        return current_image
    
    def _create_padding_mask(
        self, 
        image: Image.Image, 
        padding_size: int
    ) -> Image.Image:
        """
        Generate a mask indicating padding regions.
        
        Args:
            image (Image.Image): Source image
            padding_size (int): Size of padding
        
        Returns:
            Image.Image: Mask image
        """
        mask = Image.new('L', image.size, 0)
        mask_array = np.array(mask)
        
        # Mark padding regions white (255)
        mask_array[:padding_size, :] = 255  # Top
        mask_array[-padding_size:, :] = 255  # Bottom
        mask_array[:, :padding_size] = 255  # Left
        mask_array[:, -padding_size:] = 255  # Right
        
        return Image.fromarray(mask_array)
    
    def generate_ltx_video(
        self, 
        image: Image.Image, 
        prompt: str = "", 
        video_config: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Generate video using LTX video generation API.
        
        Args:
            image (Image.Image): Input image
            prompt (str, optional): Optional video generation prompt
            video_config (Dict, optional): Custom video generation parameters
        
        Returns:
            Dict: API response containing video generation details
        """
        default_config = {
            "width": 768,
            "height": 480,
            "num_frames": 129,  # 8*16 + 1
            "num_inference_steps": 50,
            "guidance_scale": 4.0,
            "double_num_frames": True,
            "fps": 60,
            "super_resolution": True,
            "grain_amount": 12
        }
        
        # Merge default and custom configurations
        config = {**default_config, **(video_config or {})}
        
        payload = {
            "inputs": {
                "image": self.encode_image(image),
                "prompt": prompt
            },
            "parameters": config
        }
        
        return self._make_api_call(
            self.endpoints['ltx_video'], 
            payload
        )
    
    def _make_api_call(
        self, 
        endpoint: str, 
        payload: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Execute API request with error handling.
        
        Args:
            endpoint (str): API endpoint URL
            payload (Dict): Request payload
        
        Returns:
            Dict: API response
        """
        headers = {
            "Authorization": f"Bearer {self.api_token}",
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        
        try:
            response = requests.post(
                endpoint, 
                headers=headers, 
                json=payload
            )
            response.raise_for_status()
            return response.json()
        
        except requests.RequestException as e:
            raise RuntimeError(f"API call failed: {e}")
    
    def _decode_image_response(
        self, 
        response: Dict[str, Any]
    ) -> Image.Image:
        """
        Decode image from API response.
        
        Args:
            response (Dict): API response
        
        Returns:
            Image.Image: Decoded image
        """
        if 'image' not in response:
            raise ValueError("No image found in API response")
        
        image_data = response['image'].split(",")[1]
        image_bytes = base64.b64decode(image_data)
        return Image.open(io.BytesIO(image_bytes))
    
    def full_pipeline(
        self, 
        initial_prompt: str, 
        outpainting_prompt: Optional[str] = None,
        video_prompt: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Execute complete image-to-video pipeline.
        
        Args:
            initial_prompt (str): Prompt for initial image generation
            outpainting_prompt (str, optional): Prompt for image expansion
            video_prompt (str, optional): Prompt for video generation
        
        Returns:
            Dict: Pipeline execution results
        """
        # 1. Generate Initial Image
        initial_image = self.generate_initial_image(initial_prompt)
        
        # 2. Outpainting (optional)
        if outpainting_prompt:
            expanded_image = self.iterative_outpainting(
                initial_image, 
                outpainting_prompt
            )
        else:
            expanded_image = initial_image
        
        # 3. Video Generation
        video_response = self.generate_ltx_video(
            expanded_image, 
            video_prompt
        )
        
        return {
            "initial_image": initial_image,
            "expanded_image": expanded_image,
            "video_response": video_response
        }

def main():
    """
    Demonstration of full AI Image-to-Video pipeline.
    """
    pipeline = AIImageVideoPipeline(
        image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT",
        outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT",
        ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT",
        api_token="YOUR_HF_API_TOKEN"
    )
    
    try:
        result = pipeline.full_pipeline(
            initial_prompt="Serene landscape with mountains and a lake",
            outpainting_prompt="Expand the scene with more natural elements",
            video_prompt="Smooth camera pan across the landscape"
        )
        
        # Save images and process video
        result['initial_image'].save("initial_image.png")
        result['expanded_image'].save("expanded_image.png")
        
        print("Pipeline execution completed successfully!")
    
    except Exception as e:
        print(f"Pipeline execution failed: {e}")

if __name__ == "__main__":
    main()