File size: 11,046 Bytes
dc24492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import os
import base64
import io
import requests
from typing import Dict, Any, Optional, List
from PIL import Image
import numpy as np

class AIImageVideoPipeline:
    """
    Comprehensive AI-powered Image-to-Video Generation Pipeline
    
    ## Workflow Stages
    1. Initial Image Generation
    2. Iterative Outpainting
    3. LTX Video Transformation
    
    ## Technical Architecture
    - Modular design with configurable components
    - Support for multiple AI inference endpoints
    - Robust error handling and logging
    """
    
    def __init__(
        self, 
        image_generation_endpoint: Optional[str] = None,
        outpainting_endpoint: Optional[str] = None,
        ltx_video_endpoint: Optional[str] = None,
        api_token: Optional[str] = None
    ):
        """
        Initialize the AI Image-to-Video pipeline.
        
        Args:
            image_generation_endpoint (str): Endpoint for initial image generation
            outpainting_endpoint (str): Endpoint for image outpainting
            ltx_video_endpoint (str): Endpoint for LTX video generation
            api_token (str): Authentication token for API calls
        """
        self.endpoints = {
            'image_gen': image_generation_endpoint or os.getenv('IMAGE_GEN_ENDPOINT'),
            'outpainting': outpainting_endpoint or os.getenv('OUTPAINTING_ENDPOINT'),
            'ltx_video': ltx_video_endpoint or os.getenv('LTX_VIDEO_ENDPOINT')
        }
        self.api_token = api_token or os.getenv('HF_API_TOKEN')
        
        # Validate endpoint configuration
        self._validate_endpoints()
    
    def _validate_endpoints(self):
        """
        Validate configured API endpoints.
        
        Raises:
            ValueError: If any required endpoint is missing
        """
        missing_endpoints = [
            key for key, value in self.endpoints.items() 
            if not value
        ]
        
        if missing_endpoints:
            raise ValueError(
                f"Missing API endpoints: {', '.join(missing_endpoints)}. "
                "Please configure via parameters or environment variables."
            )
    
    def encode_image(
        self, 
        image: Image.Image, 
        format: str = 'JPEG'
    ) -> str:
        """
        Encode PIL Image to base64 data URI.
        
        Args:
            image (Image.Image): Input image
            format (str): Output image format
        
        Returns:
            str: Base64 encoded data URI
        """
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format=format)
        base64_encoded = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
        return f"data:image/{format.lower()};base64,{base64_encoded}"
    
    def generate_initial_image(
        self, 
        prompt: str, 
        width: int = 768, 
        height: int = 480
    ) -> Image.Image:
        """
        Generate initial image using text prompt.
        
        Args:
            prompt (str): Image generation prompt
            width (int): Image width
            height (int): Image height
        
        Returns:
            Image.Image: Generated image
        """
        payload = {
            "inputs": prompt,
            "parameters": {
                "width": width,
                "height": height
            }
        }
        
        response = self._make_api_call(
            self.endpoints['image_gen'], 
            payload
        )
        
        return self._decode_image_response(response)
    
    def iterative_outpainting(
        self, 
        image: Image.Image, 
        prompt: str, 
        iterations: int = 3,
        padding_size: int = 256
    ) -> Image.Image:
        """
        Perform iterative outpainting to expand image.
        
        Args:
            image (Image.Image): Starting image
            prompt (str): Outpainting generation prompt
            iterations (int): Number of outpainting steps
            padding_size (int): Padding size for each iteration
        
        Returns:
            Image.Image: Final outpainted image
        """
        current_image = image.copy()
        
        for _ in range(iterations):
            # Create padded image
            padded_size = (
                current_image.width + 2 * padding_size, 
                current_image.height + 2 * padding_size
            )
            padded_image = Image.new('RGBA', padded_size, (0, 0, 0, 0))
            padded_image.paste(
                current_image, 
                (padding_size, padding_size)
            )
            
            # Create mask for padding regions
            mask = self._create_padding_mask(padded_image, padding_size)
            
            # Outpainting request
            payload = {
                "inputs": prompt,
                "image": self.encode_image(padded_image),
                "mask_image": self.encode_image(mask)
            }
            
            response = self._make_api_call(
                self.endpoints['outpainting'], 
                payload
            )
            
            current_image = self._decode_image_response(response)
        
        return current_image
    
    def _create_padding_mask(
        self, 
        image: Image.Image, 
        padding_size: int
    ) -> Image.Image:
        """
        Generate a mask indicating padding regions.
        
        Args:
            image (Image.Image): Source image
            padding_size (int): Size of padding
        
        Returns:
            Image.Image: Mask image
        """
        mask = Image.new('L', image.size, 0)
        mask_array = np.array(mask)
        
        # Mark padding regions white (255)
        mask_array[:padding_size, :] = 255  # Top
        mask_array[-padding_size:, :] = 255  # Bottom
        mask_array[:, :padding_size] = 255  # Left
        mask_array[:, -padding_size:] = 255  # Right
        
        return Image.fromarray(mask_array)
    
    def generate_ltx_video(
        self, 
        image: Image.Image, 
        prompt: str = "", 
        video_config: Optional[Dict[str, Any]] = None
    ) -> Dict[str, Any]:
        """
        Generate video using LTX video generation API.
        
        Args:
            image (Image.Image): Input image
            prompt (str, optional): Optional video generation prompt
            video_config (Dict, optional): Custom video generation parameters
        
        Returns:
            Dict: API response containing video generation details
        """
        default_config = {
            "width": 768,
            "height": 480,
            "num_frames": 129,  # 8*16 + 1
            "num_inference_steps": 50,
            "guidance_scale": 4.0,
            "double_num_frames": True,
            "fps": 60,
            "super_resolution": True,
            "grain_amount": 12
        }
        
        # Merge default and custom configurations
        config = {**default_config, **(video_config or {})}
        
        payload = {
            "inputs": {
                "image": self.encode_image(image),
                "prompt": prompt
            },
            "parameters": config
        }
        
        return self._make_api_call(
            self.endpoints['ltx_video'], 
            payload
        )
    
    def _make_api_call(
        self, 
        endpoint: str, 
        payload: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Execute API request with error handling.
        
        Args:
            endpoint (str): API endpoint URL
            payload (Dict): Request payload
        
        Returns:
            Dict: API response
        """
        headers = {
            "Authorization": f"Bearer {self.api_token}",
            "Content-Type": "application/json",
            "Accept": "application/json"
        }
        
        try:
            response = requests.post(
                endpoint, 
                headers=headers, 
                json=payload
            )
            response.raise_for_status()
            return response.json()
        
        except requests.RequestException as e:
            raise RuntimeError(f"API call failed: {e}")
    
    def _decode_image_response(
        self, 
        response: Dict[str, Any]
    ) -> Image.Image:
        """
        Decode image from API response.
        
        Args:
            response (Dict): API response
        
        Returns:
            Image.Image: Decoded image
        """
        if 'image' not in response:
            raise ValueError("No image found in API response")
        
        image_data = response['image'].split(",")[1]
        image_bytes = base64.b64decode(image_data)
        return Image.open(io.BytesIO(image_bytes))
    
    def full_pipeline(
        self, 
        initial_prompt: str, 
        outpainting_prompt: Optional[str] = None,
        video_prompt: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Execute complete image-to-video pipeline.
        
        Args:
            initial_prompt (str): Prompt for initial image generation
            outpainting_prompt (str, optional): Prompt for image expansion
            video_prompt (str, optional): Prompt for video generation
        
        Returns:
            Dict: Pipeline execution results
        """
        # 1. Generate Initial Image
        initial_image = self.generate_initial_image(initial_prompt)
        
        # 2. Outpainting (optional)
        if outpainting_prompt:
            expanded_image = self.iterative_outpainting(
                initial_image, 
                outpainting_prompt
            )
        else:
            expanded_image = initial_image
        
        # 3. Video Generation
        video_response = self.generate_ltx_video(
            expanded_image, 
            video_prompt
        )
        
        return {
            "initial_image": initial_image,
            "expanded_image": expanded_image,
            "video_response": video_response
        }

def main():
    """
    Demonstration of full AI Image-to-Video pipeline.
    """
    pipeline = AIImageVideoPipeline(
        image_generation_endpoint="YOUR_IMAGE_GEN_ENDPOINT",
        outpainting_endpoint="YOUR_OUTPAINTING_ENDPOINT",
        ltx_video_endpoint="YOUR_LTX_VIDEO_ENDPOINT",
        api_token="YOUR_HF_API_TOKEN"
    )
    
    try:
        result = pipeline.full_pipeline(
            initial_prompt="Serene landscape with mountains and a lake",
            outpainting_prompt="Expand the scene with more natural elements",
            video_prompt="Smooth camera pan across the landscape"
        )
        
        # Save images and process video
        result['initial_image'].save("initial_image.png")
        result['expanded_image'].save("expanded_image.png")
        
        print("Pipeline execution completed successfully!")
    
    except Exception as e:
        print(f"Pipeline execution failed: {e}")

if __name__ == "__main__":
    main()