import os import subprocess import sys from pathlib import Path # --- 0. Hardcoded Toggle for Execution Environment --- # Set this to True to use Hugging Face ZeroGPU (recommended) # Set this to False to use the slower, pure CPU environment USE_ZEROGPU = True # --- 1. Clone the VibeVoice Repository --- repo_dir = "VibeVoice" if not os.path.exists(repo_dir): print("Cloning the VibeVoice repository...") try: subprocess.run( ["git", "clone", "https://github.com/microsoft/VibeVoice.git"], check=True, capture_output=True, text=True ) print("Repository cloned successfully.") except subprocess.CalledProcessError as e: print(f"Error cloning repository: {e.stderr}") sys.exit(1) else: print("Repository already exists. Skipping clone.") # --- 2. Install the VibeVoice Package --- # Note: Other dependencies are installed via requirements.txt os.chdir(repo_dir) print(f"Changed directory to: {os.getcwd()}") print("Installing the VibeVoice package in editable mode...") try: subprocess.run( [sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True ) print("Package installed successfully.") except subprocess.CalledProcessError as e: print(f"Error installing package: {e.stderr}") sys.exit(1) # --- 3. Modify the demo script to be environment-aware --- demo_script_path = Path("demo/gradio_demo.py") print(f"Reading {demo_script_path} to apply environment-specific modifications...") try: file_content = demo_script_path.read_text() # Define the original model loading block using a list of lines for robustness. # This avoids issues with indentation in multi-line string literals. original_lines = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.bfloat16,', " device_map='cuda',", ' attn_implementation="flash_attention_2",', ' )' ] original_block = "\n".join(original_lines) # Check if the block to be patched exists in the file if original_block not in file_content: print("\033[91mError: The original code block to be patched was not found.\033[0m") print("The demo script may have changed, or there might be a whitespace mismatch.") print("Please verify the contents of demo/gradio_demo.py.") sys.exit(1) if USE_ZEROGPU: print("Optimizing for ZeroGPU execution...") # New block for ZeroGPU: We remove the problematic `attn_implementation` line. replacement_lines_gpu = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.bfloat16,', " device_map='cuda',", ' )' ] replacement_block_gpu = "\n".join(replacement_lines_gpu) # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator if "import spaces" not in file_content: modified_content = "import spaces\n" + file_content else: modified_content = file_content # Decorate the main interface class to request a GPU from the Spaces infrastructure if "@spaces.GPU" not in modified_content: modified_content = modified_content.replace( "class VibeVoiceDemo:", "@spaces.GPU(duration=120)\nclass VibeVoiceDemo:" ) # Replace the model loading block modified_content = modified_content.replace(original_block, replacement_block_gpu) print("Script modified for ZeroGPU successfully.") else: # Pure CPU execution print("Modifying for pure CPU execution...") # New block for CPU: Use float32 and map directly to the CPU. replacement_lines_cpu = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.float32, # Use float32 for CPU', ' device_map="cpu",', ' )' ] replacement_block_cpu = "\n".join(replacement_lines_cpu) # Replace the original model loading block with the CPU version modified_content = file_content.replace(original_block, replacement_block_cpu) print("Script modified for CPU successfully.") # Write the dynamically modified content back to the demo file demo_script_path.write_text(modified_content) except Exception as e: print(f"An error occurred while modifying the script: {e}") sys.exit(1) # --- 4. Launch the Gradio Demo --- model_id = "microsoft/VibeVoice-1.5B" # Construct the command to run the modified demo script command = [ "python", str(demo_script_path), "--model_path", model_id, "--share" ] print(f"Launching Gradio demo with command: {' '.join(command)}") subprocess.run(command)