VibeVoice-demo-dev

Running on Zero

App Files Files Community

broadfield-dev commited on 21 days ago

Commit

c1b8cf8

verified ·

1 Parent(s): b4b995a

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -46

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ import sys
 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
-# Set this to True to use Hugging Face ZeroGPU
-# Set this to False to use a pure CPU environment
 USE_ZEROGPU = True
 # --- 1. Clone the VibeVoice Repository ---
@@ -44,12 +44,12 @@ except subprocess.CalledProcessError as e:
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
-# Install 'spaces' if using ZeroGPU
 if USE_ZEROGPU:
     print("Installing the 'spaces' library for ZeroGPU...")
     try:
         subprocess.run(
-            [sys.executable, "-m", "pip", "install", "huggingface-hub", "gradio", "spaces"],
             check=True,
             capture_output=True,
             text=True
@@ -59,74 +59,68 @@ if USE_ZEROGPU:
         print(f"Error installing 'spaces' library: {e.stderr}")
         sys.exit(1)
 # --- 3. Modify the demo script based on the toggle ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Reading {demo_script_path}...")
 try:
     file_content = demo_script_path.read_text()
-    if USE_ZEROGPU:
-        print("Optimizing for ZeroGPU execution...")
-        # Ensure the original GPU block is present
-        original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
             attn_implementation="flash_attention_2",
         )"""
-        if original_block in file_content:
-            # Add 'import spaces' at the beginning of the file
-            modified_content = "import spaces\n" + file_content
-            # Decorate the model loading and generation functions with @spaces.GPU
-            # This is a robust way to ensure both setup and inference get GPU access
-            modified_content = modified_content.replace(
-                "class VibeVoiceGradioInterface:",
-                "@spaces.GPU\nclass VibeVoiceGradioInterface:"
-            )
-            print("Script modified for ZeroGPU successfully.")
-            # Write the modified content back to the file
-            demo_script_path.write_text(modified_content)
-        else:
-            print("Warning: Original GPU-specific model loading block not found. The script might have been updated. Proceeding with potential ZeroGPU compatibility.")
-    else:
-        print("Modifying for CPU execution...")
-        # Define the original GPU-specific model loading block
-        original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
-            attn_implementation="flash_attention_2",
         )"""
-        # Define the new CPU-compatible block
-        replacement_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.float32,  # Use float32 for CPU
             device_map="cpu",
         )"""
-        # Replace the entire block
-        if original_block in file_content:
-            modified_content = file_content.replace(original_block, replacement_block)
-            # Write the modified content back to the file
-            demo_script_path.write_text(modified_content)
-            print("Script modified for CPU successfully.")
-        else:
-            print("Warning: GPU-specific model loading block not found. The script might have been updated. Proceeding without modification.")
 except Exception as e:
     print(f"An error occurred while modifying the script: {e}")
     sys.exit(1)
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
@@ -140,5 +134,4 @@ command = [
 ]
 print(f"Launching Gradio demo with command: {' '.join(command)}")
-# This command will start the Gradio server
 subprocess.run(command)

 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
+# Set this to True to use Hugging Face ZeroGPU (recommended)
+# Set this to False to use the slower, pure CPU environment
 USE_ZEROGPU = True
 # --- 1. Clone the VibeVoice Repository ---
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
+# Install 'spaces' if using ZeroGPU, as it's required for the decorator
 if USE_ZEROGPU:
     print("Installing the 'spaces' library for ZeroGPU...")
     try:
         subprocess.run(
+            [sys.executable, "-m", "pip", "install", "spaces"],
             check=True,
             capture_output=True,
             text=True
         print(f"Error installing 'spaces' library: {e.stderr}")
         sys.exit(1)
 # --- 3. Modify the demo script based on the toggle ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Reading {demo_script_path}...")
 try:
     file_content = demo_script_path.read_text()
+    # Define the original GPU-specific model loading block we want to replace
+    # This block is problematic because it hardcodes FlashAttention
+    original_block = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
             attn_implementation="flash_attention_2",
         )"""
+    if USE_ZEROGPU:
+        print("Optimizing for ZeroGPU execution...")
+        # New block for ZeroGPU: We remove the problematic flash_attention line.
+        # Transformers will automatically use the best available attention mechanism.
+        replacement_block_gpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.bfloat16,
             device_map='cuda',
         )"""
+        # Add 'import spaces' at the beginning of the file
+        modified_content = "import spaces\n" + file_content
+        # Decorate the main class with @spaces.GPU to request a GPU
+        modified_content = modified_content.replace(
+            "class VibeVoiceGradioInterface:",
+            "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
+        )
+        # Replace the model loading block
+        modified_content = modified_content.replace(original_block, replacement_block_gpu)
+        print("Script modified for ZeroGPU successfully.")
+    else: # Pure CPU execution
+        print("Modifying for pure CPU execution...")
+        # New block for CPU: Use float32 and map directly to CPU.
+        # FlashAttention is not compatible with CPU.
+        replacement_block_cpu = """        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
             self.model_path,
             torch_dtype=torch.float32,  # Use float32 for CPU
             device_map="cpu",
         )"""
+        # Replace the model loading block
+        modified_content = file_content.replace(original_block, replacement_block_cpu)
+        print("Script modified for CPU successfully.")
+    # Write the modified content back to the file
+    demo_script_path.write_text(modified_content)
 except Exception as e:
     print(f"An error occurred while modifying the script: {e}")
     sys.exit(1)
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
 ]
 print(f"Launching Gradio demo with command: {' '.join(command)}")
 subprocess.run(command)