VibeVoice-demo-dev

Running on Zero

App Files Files Community

broadfield-dev commited on 23 days ago

Commit

fb12e2c

verified ·

1 Parent(s): b2be989

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -24

app.py CHANGED Viewed

@@ -51,34 +51,50 @@ print(f"Reading {demo_script_path} to apply environment-specific modifications..
 try:
     file_content = demo_script_path.read_text()
-    # Define the original model loading block that we need to replace.
-    # This block is problematic because it hardcodes FlashAttention.
-    original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
-            self.model_path,
-            torch_dtype=torch.bfloat16,
-            device_map='cuda',
-            attn_implementation="flash_attention_2",
-        )"""
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
         # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
-        # `transformers` will automatically use the best available attention mechanism.
-        replacement_block_gpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
-            self.model_path,
-            torch_dtype=torch.bfloat16,
-            device_map='cuda',
-        )"""
         # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
-        modified_content = "import spaces\n" + file_content
         # Decorate the main interface class to request a GPU from the Spaces infrastructure
-        modified_content = modified_content.replace(
-            "class VibeVoiceGradioInterface:",
-            "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
-        )
         # Replace the model loading block
         modified_content = modified_content.replace(original_block, replacement_block_gpu)
@@ -88,11 +104,14 @@ try:
         print("Modifying for pure CPU execution...")
         # New block for CPU: Use float32 and map directly to the CPU.
-        replacement_block_cpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
-            self.model_path,
-            torch_dtype=torch.float32,  # Use float32 for CPU
-            device_map="cpu",
-        )"""
         # Replace the original model loading block with the CPU version
         modified_content = file_content.replace(original_block, replacement_block_cpu)

 try:
     file_content = demo_script_path.read_text()
+    # Define the original model loading block using a list of lines for robustness.
+    # This avoids issues with indentation in multi-line string literals.
+    original_lines = [
+        '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
+        '            self.model_path,',
+        '            torch_dtype=torch.bfloat16,',
+        "            device_map='cuda',",
+        '            attn_implementation="flash_attention_2",',
+        '        )'
+    ]
+    original_block = "\n".join(original_lines)
+    # Check if the block to be patched exists in the file
+    if original_block not in file_content:
+        print("\033[91mError: The original code block to be patched was not found.\033[0m")
+        print("The demo script may have changed, or there might be a whitespace mismatch.")
+        print("Please verify the contents of demo/gradio_demo.py.")
+        sys.exit(1)
     if USE_ZEROGPU:
         print("Optimizing for ZeroGPU execution...")
         # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
+        replacement_lines_gpu = [
+            '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
+            '            self.model_path,',
+            '            torch_dtype=torch.bfloat16,',
+            "            device_map='cuda',",
+            '        )'
+        ]
+        replacement_block_gpu = "\n".join(replacement_lines_gpu)
         # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
+        if "import spaces" not in file_content:
+             modified_content = "import spaces\n" + file_content
+        else:
+             modified_content = file_content
         # Decorate the main interface class to request a GPU from the Spaces infrastructure
+        if "@spaces.GPU" not in modified_content:
+            modified_content = modified_content.replace(
+                "class VibeVoiceDemo:",
+                "@spaces.GPU(duration=120)\nclass VibeVoiceDemo:"
+            )
         # Replace the model loading block
         modified_content = modified_content.replace(original_block, replacement_block_gpu)
         print("Modifying for pure CPU execution...")
         # New block for CPU: Use float32 and map directly to the CPU.
+        replacement_lines_cpu = [
+            '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
+            '            self.model_path,',
+            '            torch_dtype=torch.float32,  # Use float32 for CPU',
+            '            device_map="cpu",',
+            '        )'
+        ]
+        replacement_block_cpu = "\n".join(replacement_lines_cpu)
         # Replace the original model loading block with the CPU version
         modified_content = file_content.replace(original_block, replacement_block_cpu)