import os import subprocess import sys from pathlib import Path # --- 0. Hardcoded Toggle for Execution Environment --- # Set this to True to use Hugging Face ZeroGPU (recommended) # Set this to False to use the slower, pure CPU environment USE_ZEROGPU = True # --- 1. Clone the VibeVoice Repository --- repo_dir = "VibeVoice" if not os.path.exists(repo_dir): print("Cloning the VibeVoice repository...") try: subprocess.run( ["git", "clone", "https://github.com/microsoft/VibeVoice.git"], check=True, capture_output=True, text=True ) print("Repository cloned successfully.") except subprocess.CalledProcessError as e: print(f"Error cloning repository: {e.stderr}") sys.exit(1) else: print("Repository already exists. Skipping clone.") # --- 2. Install Dependencies --- os.chdir(repo_dir) print(f"Changed directory to: {os.getcwd()}") # Install the main package print("Installing the VibeVoice package...") try: subprocess.run( [sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True ) print("Package installed successfully.") except subprocess.CalledProcessError as e: print(f"Error installing package: {e.stderr}") sys.exit(1) # Install 'spaces' if using ZeroGPU, as it's required for the decorator if USE_ZEROGPU: print("Installing the 'spaces' library for ZeroGPU...") try: subprocess.run( [sys.executable, "-m", "pip", "install", "spaces"], check=True, capture_output=True, text=True ) print("'spaces' library installed successfully.") except subprocess.CalledProcessError as e: print(f"Error installing 'spaces' library: {e.stderr}") sys.exit(1) # --- 3. Modify the demo script based on the toggle --- demo_script_path = Path("demo/gradio_demo.py") print(f"Reading {demo_script_path}...") try: file_content = demo_script_path.read_text() # Define the original GPU-specific model loading block we want to replace # This block is problematic because it hardcodes FlashAttention original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.bfloat16, device_map='cuda', attn_implementation="flash_attention_2", )""" if USE_ZEROGPU: print("Optimizing for ZeroGPU execution...") # New block for ZeroGPU: We remove the problematic flash_attention line. # Transformers will automatically use the best available attention mechanism. replacement_block_gpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.bfloat16, device_map='cuda', )""" # Add 'import spaces' at the beginning of the file modified_content = "import spaces\n" + file_content # Decorate the main class with @spaces.GPU to request a GPU modified_content = modified_content.replace( "class VibeVoiceGradioInterface:", "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:" ) # Replace the model loading block modified_content = modified_content.replace(original_block, replacement_block_gpu) print("Script modified for ZeroGPU successfully.") else: # Pure CPU execution print("Modifying for pure CPU execution...") # New block for CPU: Use float32 and map directly to CPU. # FlashAttention is not compatible with CPU. replacement_block_cpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( self.model_path, torch_dtype=torch.float32, # Use float32 for CPU device_map="cpu", )""" # Replace the model loading block modified_content = file_content.replace(original_block, replacement_block_cpu) print("Script modified for CPU successfully.") # Write the modified content back to the file demo_script_path.write_text(modified_content) except Exception as e: print(f"An error occurred while modifying the script: {e}") sys.exit(1) # --- 4. Launch the Gradio Demo --- model_id = "microsoft/VibeVoice-1.5B" # Construct the command as specified in the README command = [ "python", str(demo_script_path), "--model_path", model_id, "--share" ] print(f"Launching Gradio demo with command: {' '.join(command)}") subprocess.run(command)