Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF staff commited on 3 days ago

Commit

883eb72

1 Parent(s): 3bdc963

Fix for #22

Browse files

Files changed (3) hide show

docs/vms/Investigating issues in production.md +55 -0
vms/ui/project/services/previewing.py +20 -21
vms/ui/project/tabs/preview_tab.py +13 -13

docs/vms/Investigating issues in production.md ADDED Viewed

	@@ -0,0 +1,55 @@

+# Investigating issue in production
+Note: the following document is meant for maintainers of VMS.
+It describe things that are normally only useful during development, for instance if there are bugs.
+Normal VMS users do not need to read this document and perform those steps, since in theory VMS is already taking care of things (eg. automatic fix of corrupted internal JSON files) or providing ways to solve common issues (eg. buttons to download or delete data).
+## Backuping data
+During development of VMs, there might be bugs creating data corruption.
+To avoid reseting the /data folder all the time, I suggest simply doing backups of what you need.
+1. run the space in developer mode
+2. open vscode from the dev mode panel
+3. go to the data dir (eg /data if you have persistence, or .data otherwise)
+4. do whatever you want (you are the developer, after all)
+5. for instance you can edit files, delete stuff etc
+### Manual backout of the output dir
+```bash
+mkdir /data/backup
+# to copy training data
+cp -r /data/training /data/backup/training
+# to copy generated models and checkpoints
+cp -r /data/output /data/backup/output
+```
+### Manual restore of a backup
+```bash
+# if you already have a backup of output/ then you can delete its content
+rm -Rf output/*
+# restore the backup, for instance the weights and checkpoints
+cp -r backup/output/* output/
+```
+### Manual restore of UI state
+Restoring the UI state can be tricky as it is being modified by Gradio.
+I recommend shutting Gradio down, but this will kill the space and the VS Code session.
+So a tricky is to restart Gradio and immediately perform this command:
+```bash
+cp backup/output/ui_state.json output/
+```
+That way Gradio will inialize itself with the backuped UI state.

vms/ui/project/services/previewing.py CHANGED Viewed

@@ -69,7 +69,7 @@ class PreviewingService:
         num_frames: int,
         guidance_scale: float,
         flow_shift: float,
-        lora_weight: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
@@ -87,15 +87,15 @@ class PreviewingService:
                 # Return updated log string for UI updates
                 return "\n".join(log_messages)
-            # Find latest LoRA weights if lora_weight > 0
             lora_path = None
-            using_lora = lora_weight > 0
             if using_lora:
                 lora_path = self.find_latest_lora_weights()
                 if not lora_path:
                     return None, "Error: No LoRA weights found", log("Error: No LoRA weights found in output directory")
-                log(f"Using LoRA weights with weight {lora_weight}")
             else:
                 log("Using original model without LoRA weights")
@@ -155,7 +155,7 @@ class PreviewingService:
             if using_lora and lora_path:
                 log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
-            log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Weight: {lora_weight if using_lora else 0}")
             log(f"Generation Seed: {seed}")
             #log(f"Prompt: {full_prompt}")
             #log(f"Negative Prompt: {negative_prompt}")
@@ -164,21 +164,21 @@ class PreviewingService:
             if internal_model_type == "wan":
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
-                    guidance_scale, flow_shift, lora_path, lora_weight,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
-                    guidance_scale, flow_shift, lora_path, lora_weight,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
-                    guidance_scale, flow_shift, lora_path, lora_weight,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
@@ -199,7 +199,7 @@ class PreviewingService:
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
-        lora_weight: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
@@ -257,11 +257,9 @@ class PreviewingService:
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
-            if lora_weight > 0 and lora_path:
-                log_fn(f"Loading LoRA weights from {lora_path} with weight {lora_weight}...")
                 pipe.load_lora_weights(lora_path)
-                # TODO: Set the lora scale directly instead of using fuse_lora
-                #pipe._lora_scale = lora_weight
             else:
                 log_fn("Using base model without LoRA weights")
@@ -290,6 +288,7 @@ class PreviewingService:
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
                     generator=generator,
                 ).frames[0]
             else:
@@ -339,7 +338,7 @@ class PreviewingService:
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
-        lora_weight: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
@@ -385,10 +384,9 @@ class PreviewingService:
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
-            if lora_weight > 0 and lora_path:
-                log_fn(f"Loading LoRA weights from {lora_path} with weight {lora_weight}...")
                 pipe.load_lora_weights(lora_path)
-                pipe.fuse_lora(lora_weight)
             else:
                 log_fn("Using base model without LoRA weights")
@@ -410,6 +408,7 @@ class PreviewingService:
                 decode_timestep=0.03,
                 decode_noise_scale=0.025,
                 num_inference_steps=inference_steps,
                 generator=generator,
             ).frames[0]
@@ -446,7 +445,7 @@ class PreviewingService:
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
-        lora_weight: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
@@ -506,10 +505,9 @@ class PreviewingService:
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
-            if lora_weight > 0 and lora_path:
-                log_fn(f"Loading LoRA weights from {lora_path} with weight {lora_weight}...")
                 pipe.load_lora_weights(lora_path)
-                pipe.fuse_lora(lora_weight)
             else:
                 log_fn("Using base model without LoRA weights")
@@ -532,6 +530,7 @@ class PreviewingService:
                 guidance_scale=guidance_scale,
                 true_cfg_scale=1.0,
                 num_inference_steps=inference_steps,
                 generator=generator,
             ).frames[0]

         num_frames: int,
         guidance_scale: float,
         flow_shift: float,
+        lora_scale: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
                 # Return updated log string for UI updates
                 return "\n".join(log_messages)
+            # Find latest LoRA weights if lora_scale > 0
             lora_path = None
+            using_lora = lora_scale > 0
             if using_lora:
                 lora_path = self.find_latest_lora_weights()
                 if not lora_path:
                     return None, "Error: No LoRA weights found", log("Error: No LoRA weights found in output directory")
+                log(f"Using LoRA weights with scale {lora_scale}")
             else:
                 log("Using original model without LoRA weights")
             if using_lora and lora_path:
                 log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
+            log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Scale: {lora_scale if using_lora else 0}")
             log(f"Generation Seed: {seed}")
             #log(f"Prompt: {full_prompt}")
             #log(f"Negative Prompt: {negative_prompt}")
             if internal_model_type == "wan":
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
+                    guidance_scale, flow_shift, lora_path, lora_scale,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
+                    guidance_scale, flow_shift, lora_path, lora_scale,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
+                    guidance_scale, flow_shift, lora_path, lora_scale,
                     inference_steps, seed, enable_cpu_offload, fps, log,
                     model_version, conditioning_image
                 )
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
+        lora_scale: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
+            if lora_scale > 0 and lora_path:
+                log_fn(f"Loading LoRA weights from {lora_path} with lora scale {lora_scale}...")
                 pipe.load_lora_weights(lora_path)
             else:
                 log_fn("Using base model without LoRA weights")
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
+                    cross_attention_kwargs={"scale": lora_scale},
                     generator=generator,
                 ).frames[0]
             else:
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
+        lora_scale: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
+            if lora_scale > 0 and lora_path:
+                log_fn(f"Loading LoRA weights from {lora_path} with lora scale {lora_scale}...")
                 pipe.load_lora_weights(lora_path)
             else:
                 log_fn("Using base model without LoRA weights")
                 decode_timestep=0.03,
                 decode_noise_scale=0.025,
                 num_inference_steps=inference_steps,
+                cross_attention_kwargs={"scale": lora_scale},
                 generator=generator,
             ).frames[0]
         guidance_scale: float,
         flow_shift: float,
         lora_path: str,
+        lora_scale: float,
         inference_steps: int,
         seed: int = -1,
         enable_cpu_offload: bool = True,
                 pipe.enable_model_cpu_offload()
             # Apply LoRA weights if using them
+            if lora_scale > 0 and lora_path:
+                log_fn(f"Loading LoRA weights from {lora_path} with lora scale {lora_scale}...")
                 pipe.load_lora_weights(lora_path)
             else:
                 log_fn("Using base model without LoRA weights")
                 guidance_scale=guidance_scale,
                 true_cfg_scale=1.0,
                 num_inference_steps=inference_steps,
+                cross_attention_kwargs={"scale": lora_scale},
                 generator=generator,
             ).frames[0]

vms/ui/project/tabs/preview_tab.py CHANGED Viewed

@@ -164,8 +164,8 @@ class PreviewTab(BaseTab):
                         )
                     with gr.Row():
-                        self.components["lora_weight"] = gr.Slider(
-                            label="LoRA Weight",
                             minimum=0.0,
                             maximum=1.0,
                             step=0.01,
@@ -236,7 +236,7 @@ class PreviewTab(BaseTab):
         is_using_lora = "Use LoRA model" in use_lora_value
         return {
-            self.components["lora_weight"]: gr.Slider(visible=is_using_lora)
         }
     def get_model_version_choices(self, model_type: str) -> List[str]:
@@ -379,7 +379,7 @@ class PreviewTab(BaseTab):
         self.components["use_lora"].change(
             fn=self.update_lora_ui,
             inputs=[self.components["use_lora"]],
-            outputs=[self.components["lora_weight"]]
         )
         # Load preview UI state when the tab is selected
@@ -397,7 +397,7 @@ class PreviewTab(BaseTab):
                     self.components["fps"],
                     self.components["guidance_scale"],
                     self.components["flow_shift"],
-                    self.components["lora_weight"],
                     self.components["inference_steps"],
                     self.components["enable_cpu_offload"],
                     self.components["model_version"],
@@ -410,7 +410,7 @@ class PreviewTab(BaseTab):
         for component_name in [
             "prompt", "negative_prompt", "prompt_prefix", "model_version", "resolution_preset",
             "width", "height", "num_frames", "fps", "guidance_scale", "flow_shift",
-            "lora_weight", "inference_steps", "enable_cpu_offload", "seed", "use_lora"
         ]:
             if component_name in self.components:
                 self.components[component_name].change(
@@ -433,7 +433,7 @@ class PreviewTab(BaseTab):
                 self.components["num_frames"],
                 self.components["guidance_scale"],
                 self.components["flow_shift"],
-                self.components["lora_weight"],
                 self.components["inference_steps"],
                 self.components["enable_cpu_offload"],
                 self.components["fps"],
@@ -535,7 +535,7 @@ class PreviewTab(BaseTab):
                 preview_state.get("fps", 16),
                 preview_state.get("guidance_scale", 5.0),
                 preview_state.get("flow_shift", 3.0),
-                preview_state.get("lora_weight", 0.7),
                 preview_state.get("inference_steps", 30),
                 preview_state.get("enable_cpu_offload", True),
                 model_version,
@@ -603,7 +603,7 @@ class PreviewTab(BaseTab):
         num_frames: int,
         guidance_scale: float,
         flow_shift: float,
-        lora_weight: float,
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
@@ -635,7 +635,7 @@ class PreviewTab(BaseTab):
                 "fps": fps,
                 "guidance_scale": guidance_scale,
                 "flow_shift": flow_shift,
-                "lora_weight": lora_weight,
                 "inference_steps": inference_steps,
                 "enable_cpu_offload": enable_cpu_offload,
                 "seed": seed,
@@ -657,8 +657,8 @@ class PreviewTab(BaseTab):
         use_lora_model = use_lora == "Use LoRA model"
         # Start actual generation
-        # If not using LoRA, set lora_weight to 0 to disable it
-        effective_lora_weight = lora_weight if use_lora_model else 0.0
         result = self.app.previewing.generate_video(
             model_type=model_type,
@@ -671,7 +671,7 @@ class PreviewTab(BaseTab):
             num_frames=num_frames,
             guidance_scale=guidance_scale,
             flow_shift=flow_shift,
-            lora_weight=effective_lora_weight,  # Use 0.0 if not using LoRA
             inference_steps=inference_steps,
             enable_cpu_offload=enable_cpu_offload,
             fps=fps,

                         )
                     with gr.Row():
+                        self.components["lora_scale"] = gr.Slider(
+                            label="LoRA Scale",
                             minimum=0.0,
                             maximum=1.0,
                             step=0.01,
         is_using_lora = "Use LoRA model" in use_lora_value
         return {
+            self.components["lora_scale"]: gr.Slider(visible=is_using_lora)
         }
     def get_model_version_choices(self, model_type: str) -> List[str]:
         self.components["use_lora"].change(
             fn=self.update_lora_ui,
             inputs=[self.components["use_lora"]],
+            outputs=[self.components["lora_scale"]]
         )
         # Load preview UI state when the tab is selected
                     self.components["fps"],
                     self.components["guidance_scale"],
                     self.components["flow_shift"],
+                    self.components["lora_scale"],
                     self.components["inference_steps"],
                     self.components["enable_cpu_offload"],
                     self.components["model_version"],
         for component_name in [
             "prompt", "negative_prompt", "prompt_prefix", "model_version", "resolution_preset",
             "width", "height", "num_frames", "fps", "guidance_scale", "flow_shift",
+            "lora_scale", "inference_steps", "enable_cpu_offload", "seed", "use_lora"
         ]:
             if component_name in self.components:
                 self.components[component_name].change(
                 self.components["num_frames"],
                 self.components["guidance_scale"],
                 self.components["flow_shift"],
+                self.components["lora_scale"],
                 self.components["inference_steps"],
                 self.components["enable_cpu_offload"],
                 self.components["fps"],
                 preview_state.get("fps", 16),
                 preview_state.get("guidance_scale", 5.0),
                 preview_state.get("flow_shift", 3.0),
+                preview_state.get("lora_scale", 0.7),
                 preview_state.get("inference_steps", 30),
                 preview_state.get("enable_cpu_offload", True),
                 model_version,
         num_frames: int,
         guidance_scale: float,
         flow_shift: float,
+        lora_scale: float,
         inference_steps: int,
         enable_cpu_offload: bool,
         fps: int,
                 "fps": fps,
                 "guidance_scale": guidance_scale,
                 "flow_shift": flow_shift,
+                "lora_scale": lora_scale,
                 "inference_steps": inference_steps,
                 "enable_cpu_offload": enable_cpu_offload,
                 "seed": seed,
         use_lora_model = use_lora == "Use LoRA model"
         # Start actual generation
+        # If not using LoRA, set lora_scale to 0 to disable it
+        effective_lora_scale = lora_scale if use_lora_model else 0.0
         result = self.app.previewing.generate_video(
             model_type=model_type,
             num_frames=num_frames,
             guidance_scale=guidance_scale,
             flow_shift=flow_shift,
+            lora_scale=effective_lora_scale,  # Use 0.0 if not using LoRA
             inference_steps=inference_steps,
             enable_cpu_offload=enable_cpu_offload,
             fps=fps,