VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 26

Commit

c5f9e51

verified ·

1 Parent(s): 88cdeea

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -49

app.py CHANGED Viewed

@@ -34,16 +34,55 @@ except subprocess.CalledProcessError as e:
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
-# --- 3. Refactor the demo script using a robust state-machine patcher ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Refactoring {demo_script_path} for ZeroGPU lazy loading...")
 try:
     with open(demo_script_path, 'r') as f:
-        lines = f.readlines()
-    # --- Prepare the code blocks to be inserted ---
-    lazy_load_code = """
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
             print("Loading processor & model for the first time on GPU worker...")
@@ -55,61 +94,25 @@ try:
             )
             self.model.eval()
             self.model.model.noise_scheduler = self.model.model.noise_scheduler.from_config(
-                self.model.model.noise_scheduler.config,
                 algorithm_type='sde-dpmsolver++',
                 beta_schedule='squaredcos_cap_v2'
             )
             self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
             print("Model and processor loaded successfully on GPU worker.")
-"""
-    # --- Perform the line-by-line modifications using a state machine ---
-    new_lines = []
-    # Add 'import spaces' at the top if it doesn't exist
-    if not any("import spaces" in line for line in lines):
-        new_lines.append("import spaces\n")
-    # State machine variables
-    in_generate_method = False
-    patched_generate_method = False
-    for line in lines:
-        # Defer the initial model loading to prevent PicklingError
-        if "self.load_model()" in line and "def __init__" in "".join(lines[lines.index(line)-2:lines.index(line)]):
-            new_lines.append("        # self.load_model() # Patched: Defer model loading\n")
-            new_lines.append("        self.model = None\n")
-            new_lines.append("        self.processor = None\n")
-            print("Successfully patched __init__ to prevent startup model load.")
-        # Start of the target method
-        elif "def generate_podcast_streaming(self," in line and not patched_generate_method:
-            new_lines.append("    @spaces.GPU(duration=120)\n")
-            new_lines.append(line)
-            in_generate_method = True
-        # End of the target method signature
-        elif "-> Iterator[tuple]:" in line and in_generate_method:
-            new_lines.append(line)
-            # Indent and insert the lazy load code
-            for code_line in lazy_load_code.strip().split('\n'):
-                new_lines.append(' ' * 8 + code_line + '\n')
-            # Reset state
-            in_generate_method = False
-            patched_generate_method = True
-            print("Successfully patched generation method for lazy loading.")
-        # All other lines
-        else:
-            new_lines.append(line)
-    if not patched_generate_method:
-        print("\033[91mError: Failed to apply the lazy-loading patch. The target method signature may have changed.\033[0m")
         sys.exit(1)
     # --- Write the modified content back to the file ---
     with open(demo_script_path, 'w') as f:
-        f.writelines(new_lines)
     print("Script patching complete.")

     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
+# --- 3. Refactor the demo script using a direct replacement strategy ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Refactoring {demo_script_path} for ZeroGPU lazy loading...")
 try:
     with open(demo_script_path, 'r') as f:
+        modified_content = f.read()
+    # --- Add 'import spaces' at the top ---
+    if "import spaces" not in modified_content:
+        modified_content = "import spaces\n" + modified_content
+    # --- Patch 1: Defer model loading in __init__ ---
+    original_init_call = "        self.load_model()"
+    replacement_init_block = (
+        "        # self.load_model() # Patched: Defer model loading\n"
+        "        self.model = None\n"
+        "        self.processor = None"
+    )
+    if original_init_call in modified_content:
+        modified_content = modified_content.replace(original_init_call, replacement_init_block, 1)
+        print("Successfully patched __init__ to prevent startup model load.")
+    else:
+        print(f"\033[91mError: Could not find '{original_init_call}' to patch. Startup patch failed.\033[0m")
+        sys.exit(1)
+    # --- Patch 2: Add decorator and lazy-loading logic to the generation method ---
+    # Define the exact block to find, spanning the full method signature down to the 'try:'.
+    # This is sensitive to whitespace but is the most direct way to replace.
+    original_method_header = """    def generate_podcast_streaming(self,
+                                 num_speakers: int,
+                                 script: str,
+                                 speaker_1: str = None,
+                                 speaker_2: str = None,
+                                 speaker_3: str = None,
+                                 speaker_4: str = None,
+                                 cfg_scale: float = 1.3) -> Iterator[tuple]:
+        try:"""
+    # Define the full replacement block with correct indentation.
+    replacement_method_header = """    @spaces.GPU(duration=120)
+    def generate_podcast_streaming(self,
+                                 num_speakers: int,
+                                 script: str,
+                                 speaker_1: str = None,
+                                 speaker_2: str = None,
+                                 speaker_3: str = None,
+                                 speaker_4: str = None,
+                                 cfg_scale: float = 1.3) -> Iterator[tuple]:
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
             print("Loading processor & model for the first time on GPU worker...")
             )
             self.model.eval()
             self.model.model.noise_scheduler = self.model.model.noise_scheduler.from_config(
+                self.model.model.noise_scheduler.config,
                 algorithm_type='sde-dpmsolver++',
                 beta_schedule='squaredcos_cap_v2'
             )
             self.model.set_ddpm_inference_steps(num_steps=self.inference_steps)
             print("Model and processor loaded successfully on GPU worker.")
+        try:"""
+    if original_method_header in modified_content:
+        modified_content = modified_content.replace(original_method_header, replacement_method_header, 1)
+        print("Successfully patched generation method for lazy loading.")
+    else:
+        print(f"\033[91mError: Could not find the method definition for 'generate_podcast_streaming' to patch. This is likely due to a whitespace mismatch. Please check the demo script.\033[0m")
         sys.exit(1)
     # --- Write the modified content back to the file ---
     with open(demo_script_path, 'w') as f:
+        f.write(modified_content)
     print("Script patching complete.")