Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Fix `improve_texture` and hide video-to-video
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -131,7 +131,7 @@ def calculate_new_dimensions(orig_w, orig_h): 
     | 
|
| 131 | 
         | 
| 132 | 
         
             
            def get_duration(prompt, negative_prompt, input_image_filepath, input_video_filepath,
         
     | 
| 133 | 
         
             
                         height_ui, width_ui, mode,
         
     | 
| 134 | 
         
            -
                          
     | 
| 135 | 
         
             
                         ui_frames_to_use,
         
     | 
| 136 | 
         
             
                         seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag,
         
     | 
| 137 | 
         
             
                         progress):
         
     | 
| 
         @@ -143,7 +143,7 @@ def get_duration(prompt, negative_prompt, input_image_filepath, input_video_file 
     | 
|
| 143 | 
         
             
            @spaces.GPU(duration=get_duration)
         
     | 
| 144 | 
         
             
            def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath,
         
     | 
| 145 | 
         
             
                         height_ui, width_ui, mode,
         
     | 
| 146 | 
         
            -
                          
     | 
| 147 | 
         
             
                         ui_frames_to_use,
         
     | 
| 148 | 
         
             
                         seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag,
         
     | 
| 149 | 
         
             
                         progress=gr.Progress(track_tqdm=True)):
         
     | 
| 
         @@ -245,12 +245,15 @@ def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath 
     | 
|
| 245 | 
         
             
                    multi_scale_pipeline_obj = LTXMultiScalePipeline(pipeline_instance, active_latent_upsampler)
         
     | 
| 246 | 
         | 
| 247 | 
         
             
                    first_pass_args = PIPELINE_CONFIG_YAML.get("first_pass", {}).copy()
         
     | 
| 248 | 
         
            -
                    first_pass_args["guidance_scale"] = float(ui_guidance_scale)
         
     | 
| 249 | 
         
            -
                     
     | 
| 250 | 
         
            -
             
     | 
| 
         | 
|
| 251 | 
         | 
| 252 | 
         
             
                    second_pass_args = PIPELINE_CONFIG_YAML.get("second_pass", {}).copy()
         
     | 
| 253 | 
         
            -
                    second_pass_args["guidance_scale"] = float(ui_guidance_scale)
         
     | 
| 
         | 
|
| 
         | 
|
| 254 | 
         | 
| 255 | 
         
             
                    multi_scale_call_kwargs = call_kwargs.copy()
         
     | 
| 256 | 
         
             
                    multi_scale_call_kwargs.update({
         
     | 
| 
         @@ -263,8 +266,16 @@ def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath 
     | 
|
| 263 | 
         
             
                    result_images_tensor = multi_scale_pipeline_obj(**multi_scale_call_kwargs).images
         
     | 
| 264 | 
         
             
                else:
         
     | 
| 265 | 
         
             
                    single_pass_call_kwargs = call_kwargs.copy()
         
     | 
| 266 | 
         
            -
                     
     | 
| 267 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 268 | 
         
             
                    single_pass_call_kwargs.pop("first_pass", None) 
         
     | 
| 269 | 
         
             
                    single_pass_call_kwargs.pop("second_pass", None)
         
     | 
| 270 | 
         
             
                    single_pass_call_kwargs.pop("downscale_factor", None)
         
     | 
| 
         @@ -335,7 +346,7 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 335 | 
         
             
                            video_n_hidden = gr.Textbox(label="video_n", visible=False, value=None)
         
     | 
| 336 | 
         
             
                            t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
         
     | 
| 337 | 
         
             
                            t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
         
     | 
| 338 | 
         
            -
                        with gr.Tab("video-to-video") as video_tab:
         
     | 
| 339 | 
         
             
                            image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
         
     | 
| 340 | 
         
             
                            video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"]) # type defaults to filepath
         
     | 
| 341 | 
         
             
                            frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=MAX_NUM_FRAMES, value=9, step=8, info="Number of initial frames to use for conditioning/transformation. Must be N*8+1.")
         
     | 
| 
         @@ -363,8 +374,9 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 363 | 
         
             
                        randomize_seed_input = gr.Checkbox(label="Randomize Seed", value=False)
         
     | 
| 364 | 
         
             
                    with gr.Row():
         
     | 
| 365 | 
         
             
                        guidance_scale_input = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, value=PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0), step=0.1, info="Controls how much the prompt influences the output. Higher values = stronger influence.")
         
     | 
| 366 | 
         
            -
                         
     | 
| 367 | 
         
            -
                         
     | 
| 
         | 
|
| 368 | 
         
             
                    with gr.Row():
         
     | 
| 369 | 
         
             
                        height_input = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
         
     | 
| 370 | 
         
             
                        width_input = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
         
     | 
| 
         @@ -436,17 +448,17 @@ with gr.Blocks(css=css) as demo: 
     | 
|
| 436 | 
         
             
                # --- INPUT LISTS (remain the same structurally) ---
         
     | 
| 437 | 
         
             
                t2v_inputs = [t2v_prompt, negative_prompt_input, image_n_hidden, video_n_hidden,
         
     | 
| 438 | 
         
             
                              height_input, width_input, gr.State("text-to-video"),
         
     | 
| 439 | 
         
            -
                               
     | 
| 440 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 441 | 
         | 
| 442 | 
         
             
                i2v_inputs = [i2v_prompt, negative_prompt_input, image_i2v, video_i_hidden,
         
     | 
| 443 | 
         
             
                              height_input, width_input, gr.State("image-to-video"),
         
     | 
| 444 | 
         
            -
                               
     | 
| 445 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 446 | 
         | 
| 447 | 
         
             
                v2v_inputs = [v2v_prompt, negative_prompt_input, image_v_hidden, video_v2v,
         
     | 
| 448 | 
         
             
                              height_input, width_input, gr.State("video-to-video"),
         
     | 
| 449 | 
         
            -
                               
     | 
| 450 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 451 | 
         | 
| 452 | 
         
             
                t2v_button.click(fn=generate, inputs=t2v_inputs, outputs=[output_video], api_name="text_to_video")
         
     | 
| 
         | 
|
| 131 | 
         | 
| 132 | 
         
             
            def get_duration(prompt, negative_prompt, input_image_filepath, input_video_filepath,
         
     | 
| 133 | 
         
             
                         height_ui, width_ui, mode,
         
     | 
| 134 | 
         
            +
                         duration_ui, # Removed ui_steps
         
     | 
| 135 | 
         
             
                         ui_frames_to_use,
         
     | 
| 136 | 
         
             
                         seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag,
         
     | 
| 137 | 
         
             
                         progress):
         
     | 
| 
         | 
|
| 143 | 
         
             
            @spaces.GPU(duration=get_duration)
         
     | 
| 144 | 
         
             
            def generate(prompt, negative_prompt, input_image_filepath, input_video_filepath,
         
     | 
| 145 | 
         
             
                         height_ui, width_ui, mode,
         
     | 
| 146 | 
         
            +
                         duration_ui, # Removed ui_steps
         
     | 
| 147 | 
         
             
                         ui_frames_to_use,
         
     | 
| 148 | 
         
             
                         seed_ui, randomize_seed, ui_guidance_scale, improve_texture_flag,
         
     | 
| 149 | 
         
             
                         progress=gr.Progress(track_tqdm=True)):
         
     | 
| 
         | 
|
| 245 | 
         
             
                    multi_scale_pipeline_obj = LTXMultiScalePipeline(pipeline_instance, active_latent_upsampler)
         
     | 
| 246 | 
         | 
| 247 | 
         
             
                    first_pass_args = PIPELINE_CONFIG_YAML.get("first_pass", {}).copy()
         
     | 
| 248 | 
         
            +
                    first_pass_args["guidance_scale"] = float(ui_guidance_scale) # UI overrides YAML
         
     | 
| 249 | 
         
            +
                    # num_inference_steps will be derived from len(timesteps) in the pipeline
         
     | 
| 250 | 
         
            +
                    first_pass_args.pop("num_inference_steps", None)
         
     | 
| 251 | 
         
            +
             
     | 
| 252 | 
         | 
| 253 | 
         
             
                    second_pass_args = PIPELINE_CONFIG_YAML.get("second_pass", {}).copy()
         
     | 
| 254 | 
         
            +
                    second_pass_args["guidance_scale"] = float(ui_guidance_scale) # UI overrides YAML
         
     | 
| 255 | 
         
            +
                    # num_inference_steps will be derived from len(timesteps) in the pipeline
         
     | 
| 256 | 
         
            +
                    second_pass_args.pop("num_inference_steps", None)
         
     | 
| 257 | 
         | 
| 258 | 
         
             
                    multi_scale_call_kwargs = call_kwargs.copy()
         
     | 
| 259 | 
         
             
                    multi_scale_call_kwargs.update({
         
     | 
| 
         | 
|
| 266 | 
         
             
                    result_images_tensor = multi_scale_pipeline_obj(**multi_scale_call_kwargs).images
         
     | 
| 267 | 
         
             
                else:
         
     | 
| 268 | 
         
             
                    single_pass_call_kwargs = call_kwargs.copy()
         
     | 
| 269 | 
         
            +
                    first_pass_config_from_yaml = PIPELINE_CONFIG_YAML.get("first_pass", {})
         
     | 
| 270 | 
         
            +
             
     | 
| 271 | 
         
            +
                    single_pass_call_kwargs["timesteps"] = first_pass_config_from_yaml.get("timesteps")
         
     | 
| 272 | 
         
            +
                    single_pass_call_kwargs["guidance_scale"] = float(ui_guidance_scale) # UI overrides YAML
         
     | 
| 273 | 
         
            +
                    single_pass_call_kwargs["stg_scale"] = first_pass_config_from_yaml.get("stg_scale")
         
     | 
| 274 | 
         
            +
                    single_pass_call_kwargs["rescaling_scale"] = first_pass_config_from_yaml.get("rescaling_scale")
         
     | 
| 275 | 
         
            +
                    single_pass_call_kwargs["skip_block_list"] = first_pass_config_from_yaml.get("skip_block_list")
         
     | 
| 276 | 
         
            +
                    
         
     | 
| 277 | 
         
            +
                    # Remove keys that might conflict or are not used in single pass / handled by above
         
     | 
| 278 | 
         
            +
                    single_pass_call_kwargs.pop("num_inference_steps", None) 
         
     | 
| 279 | 
         
             
                    single_pass_call_kwargs.pop("first_pass", None) 
         
     | 
| 280 | 
         
             
                    single_pass_call_kwargs.pop("second_pass", None)
         
     | 
| 281 | 
         
             
                    single_pass_call_kwargs.pop("downscale_factor", None)
         
     | 
| 
         | 
|
| 346 | 
         
             
                            video_n_hidden = gr.Textbox(label="video_n", visible=False, value=None)
         
     | 
| 347 | 
         
             
                            t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
         
     | 
| 348 | 
         
             
                            t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
         
     | 
| 349 | 
         
            +
                        with gr.Tab("video-to-video", visible=False) as video_tab:
         
     | 
| 350 | 
         
             
                            image_v_hidden = gr.Textbox(label="image_v", visible=False, value=None)
         
     | 
| 351 | 
         
             
                            video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"]) # type defaults to filepath
         
     | 
| 352 | 
         
             
                            frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=MAX_NUM_FRAMES, value=9, step=8, info="Number of initial frames to use for conditioning/transformation. Must be N*8+1.")
         
     | 
| 
         | 
|
| 374 | 
         
             
                        randomize_seed_input = gr.Checkbox(label="Randomize Seed", value=False)
         
     | 
| 375 | 
         
             
                    with gr.Row():
         
     | 
| 376 | 
         
             
                        guidance_scale_input = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, value=PIPELINE_CONFIG_YAML.get("first_pass", {}).get("guidance_scale", 1.0), step=0.1, info="Controls how much the prompt influences the output. Higher values = stronger influence.")
         
     | 
| 377 | 
         
            +
                        # Removed steps_input slider
         
     | 
| 378 | 
         
            +
                        # default_steps = len(PIPELINE_CONFIG_YAML.get("first_pass", {}).get("timesteps", [1]*7)) 
         
     | 
| 379 | 
         
            +
                        # steps_input = gr.Slider(label="Inference Steps (for first pass if multi-scale)", minimum=1, maximum=30, value=default_steps, step=1, info="Number of denoising steps. More steps can improve quality but increase time. If YAML defines 'timesteps' for a pass, this UI value is ignored for that pass.")
         
     | 
| 380 | 
         
             
                    with gr.Row():
         
     | 
| 381 | 
         
             
                        height_input = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
         
     | 
| 382 | 
         
             
                        width_input = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE, info="Must be divisible by 32.")
         
     | 
| 
         | 
|
| 448 | 
         
             
                # --- INPUT LISTS (remain the same structurally) ---
         
     | 
| 449 | 
         
             
                t2v_inputs = [t2v_prompt, negative_prompt_input, image_n_hidden, video_n_hidden,
         
     | 
| 450 | 
         
             
                              height_input, width_input, gr.State("text-to-video"),
         
     | 
| 451 | 
         
            +
                              duration_input, gr.State(0), # Removed steps_input
         
     | 
| 452 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 453 | 
         | 
| 454 | 
         
             
                i2v_inputs = [i2v_prompt, negative_prompt_input, image_i2v, video_i_hidden,
         
     | 
| 455 | 
         
             
                              height_input, width_input, gr.State("image-to-video"),
         
     | 
| 456 | 
         
            +
                              duration_input, gr.State(0), # Removed steps_input
         
     | 
| 457 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 458 | 
         | 
| 459 | 
         
             
                v2v_inputs = [v2v_prompt, negative_prompt_input, image_v_hidden, video_v2v,
         
     | 
| 460 | 
         
             
                              height_input, width_input, gr.State("video-to-video"),
         
     | 
| 461 | 
         
            +
                              duration_input, frames_to_use, # Removed steps_input
         
     | 
| 462 | 
         
             
                              seed_input, randomize_seed_input, guidance_scale_input, improve_texture]
         
     | 
| 463 | 
         | 
| 464 | 
         
             
                t2v_button.click(fn=generate, inputs=t2v_inputs, outputs=[output_video], api_name="text_to_video")
         
     |