VibeVoice-demo-dev

Running on Zero

App Files Files Community

broadfield-dev commited on 25 days ago

Commit

6a0b1a5

verified ·

1 Parent(s): fb12e2c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -36

app.py CHANGED Viewed

@@ -49,11 +49,12 @@ demo_script_path = Path("demo/gradio_demo.py")
 print(f"Reading {demo_script_path} to apply environment-specific modifications...")
 try:
-    file_content = demo_script_path.read_text()
-    # Define the original model loading block using a list of lines for robustness.
-    # This avoids issues with indentation in multi-line string literals.
-    original_lines = [
         '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
         '            self.model_path,',
         '            torch_dtype=torch.bfloat16,',
@@ -61,61 +62,77 @@ try:
         '            attn_implementation="flash_attention_2",',
         '        )'
     ]
-    original_block = "\n".join(original_lines)
-    # Check if the block to be patched exists in the file
-    if original_block not in file_content:
-        print("\033[91mError: The original code block to be patched was not found.\033[0m")
-        print("The demo script may have changed, or there might be a whitespace mismatch.")
-        print("Please verify the contents of demo/gradio_demo.py.")
-        sys.exit(1)
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
-        # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
-        replacement_lines_gpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
             '            torch_dtype=torch.bfloat16,',
             "            device_map='cuda',",
             '        )'
         ]
-        replacement_block_gpu = "\n".join(replacement_lines_gpu)
-        # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
-        if "import spaces" not in file_content:
-             modified_content = "import spaces\n" + file_content
         else:
-             modified_content = file_content
-        # Decorate the main interface class to request a GPU from the Spaces infrastructure
-        if "@spaces.GPU" not in modified_content:
-            modified_content = modified_content.replace(
-                "class VibeVoiceDemo:",
-                "@spaces.GPU(duration=120)\nclass VibeVoiceDemo:"
-            )
-        # Replace the model loading block
-        modified_content = modified_content.replace(original_block, replacement_block_gpu)
-        print("Script modified for ZeroGPU successfully.")
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
-        # New block for CPU: Use float32 and map directly to the CPU.
-        replacement_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
             '            torch_dtype=torch.float32,  # Use float32 for CPU',
             '            device_map="cpu",',
             '        )'
         ]
-        replacement_block_cpu = "\n".join(replacement_lines_cpu)
-        # Replace the original model loading block with the CPU version
-        modified_content = file_content.replace(original_block, replacement_block_cpu)
-        print("Script modified for CPU successfully.")
     # Write the dynamically modified content back to the demo file
     demo_script_path.write_text(modified_content)
@@ -125,7 +142,7 @@ except Exception as e:
     sys.exit(1)
 # --- 4. Launch the Gradio Demo ---
-model_id = "microsoft/VibeVoice-1.5B"
 # Construct the command to run the modified demo script
 command = [

 print(f"Reading {demo_script_path} to apply environment-specific modifications...")
 try:
+    modified_content = demo_script_path.read_text()
+    # --- Patch Definitions ---
+    # Define the original model loading block to be replaced.
+    original_model_lines = [
         '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
         '            self.model_path,',
         '            torch_dtype=torch.bfloat16,',
         '            attn_implementation="flash_attention_2",',
         '        )'
     ]
+    original_model_block = "\n".join(original_model_lines)
+    # Define the generation method signature to add the GPU decorator to.
+    original_method_lines = [
+        '    def generate_podcast_streaming(self, ',
+        '                                 num_speakers: int,',
+        '                                 script: str,',
+        '                                 speaker_1: str = None,',
+        '                                 speaker_2: str = None,',
+        '                                 speaker_3: str = None,',
+        '                                 speaker_4: str = None,',
+        '                                 cfg_scale: float = 1.3) -> Iterator[tuple]:'
+    ]
+    original_method_signature = "\n".join(original_method_lines)
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
+        # Add 'import spaces' if it's not already there.
+        if "import spaces" not in modified_content:
+            modified_content = "import spaces\n" + modified_content
+        # New block for ZeroGPU model loading: remove `attn_implementation`.
+        replacement_model_lines_gpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
             '            torch_dtype=torch.bfloat16,',
             "            device_map='cuda',",
             '        )'
         ]
+        replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu)
+        # Add the @spaces.GPU decorator to the generation method instead of the class.
+        replacement_method_signature_gpu = "@spaces.GPU(duration=120)\n" + original_method_signature
+        # --- Apply Patches for GPU ---
+        # Patch 1: Decorate the generation method
+        if original_method_signature in modified_content:
+            modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu)
+            print("Successfully applied GPU decorator to the generation method.")
         else:
+            print("\033[91mWarning: Could not find the generation method signature to apply the GPU decorator.\033[0m")
+        # Patch 2: Modify the model loading
+        if original_model_block in modified_content:
+            modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu)
+            print("Successfully patched the model loading block for ZeroGPU.")
+        else:
+            print("\033[91mWarning: The original model loading block was not found. Patching may have failed.\033[0m")
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
+        # New block for CPU: Use float32 and map to CPU.
+        replacement_model_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
             '            torch_dtype=torch.float32,  # Use float32 for CPU',
             '            device_map="cpu",',
             '        )'
         ]
+        replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu)
+        # Apply patch for CPU
+        if original_model_block in modified_content:
+            modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu)
+            print("Script modified for CPU successfully.")
+        else:
+            print("\033[91mWarning: The original model loading block was not found. Patching may have failed.\033[0m")
     # Write the dynamically modified content back to the demo file
     demo_script_path.write_text(modified_content)
     sys.exit(1)
 # --- 4. Launch the Gradio Demo ---
+model_id = "microsoft/V_VibeVoice-1.5B"
 # Construct the command to run the modified demo script
 command = [