import os import subprocess import sys from pathlib import Path # --- 0. Hardcoded Toggle for Execution Environment --- # Set this to True to use Hugging Face ZeroGPU (recommended) # Set this to False to use the slower, pure CPU environment USE_ZEROGPU = True # --- 1. Clone the VibeVoice Repository --- repo_dir = "VibeVoice" if not os.path.exists(repo_dir): print("Cloning the VibeVoice repository...") try: subprocess.run( ["git", "clone", "https://github.com/microsoft/VibeVoice.git"], check=True, capture_output=True, text=True ) print("Repository cloned successfully.") except subprocess.CalledProcessError as e: print(f"Error cloning repository: {e.stderr}") sys.exit(1) else: print("Repository already exists. Skipping clone.") # --- 2. Install the VibeVoice Package --- # Note: Other dependencies are installed via requirements.txt os.chdir(repo_dir) print(f"Changed directory to: {os.getcwd()}") print("Installing the VibeVoice package in editable mode...") try: subprocess.run( [sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True ) print("Package installed successfully.") except subprocess.CalledProcessError as e: print(f"Error installing package: {e.stderr}") sys.exit(1) # --- 3. Modify the demo script to be environment-aware --- demo_script_path = Path("demo/gradio_demo.py") print(f"Reading {demo_script_path} to apply environment-specific modifications...") try: modified_content = demo_script_path.read_text() # Define the original model loading block using a list of lines for robustness. original_model_lines = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.bfloat16,', " device_map='cuda',", ' attn_implementation="flash_attention_2",', ' )' ] original_model_block = "\n".join(original_model_lines) # More robustly define the generation method signature to patch. # We only need the first line to find our target. original_method_signature = " def generate_podcast_streaming(self," if USE_ZEROGPU: print("Optimizing for ZeroGPU execution...") # Add 'import spaces' if it's not already there. if "import spaces" not in modified_content: modified_content = "import spaces\n" + modified_content # New block for ZeroGPU model loading: remove `attn_implementation`. replacement_model_lines_gpu = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.bfloat16,', " device_map='cuda',", ' )' ] replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu) # Add the @spaces.GPU decorator *with correct indentation* before the method. replacement_method_signature_gpu = " @spaces.GPU(duration=120)\n" + original_method_signature # --- Apply Patches for GPU --- # Patch 1: Decorate the generation method if original_method_signature in modified_content: modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu) print("Successfully applied GPU decorator to the generation method.") else: print("\033[91mError: Could not find the generation method signature to apply the GPU decorator.\033[0m") sys.exit(1) # Patch 2: Modify the model loading if original_model_block in modified_content: modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu) print("Successfully patched the model loading block for ZeroGPU.") else: print("\033[91mError: The original model loading block was not found. Patching may have failed.\033[0m") sys.exit(1) else: # Pure CPU execution print("Modifying for pure CPU execution...") # New block for CPU: Use float32 and map to CPU. replacement_model_lines_cpu = [ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(', ' self.model_path,', ' torch_dtype=torch.float32, # Use float32 for CPU', ' device_map="cpu",', ' )' ] replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu) # Apply patch for CPU if original_model_block in modified_content: modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu) print("Script modified for CPU successfully.") else: print("\033[91mError: The original model loading block was not found. Patching may have failed.\03-3[0m") sys.exit(1) # Write the dynamically modified content back to the demo file demo_script_path.write_text(modified_content) except Exception as e: print(f"An error occurred while modifying the script: {e}") sys.exit(1) # --- 4. Launch the Gradio Demo --- model_id = "microsoft/VibeVoice-1.5B" # Construct the command to run the modified demo script command = [ "python", str(demo_script_path), "--model_path", model_id, "--share" ] print(f"Launching Gradio demo with command: {' '.join(command)}") subprocess.run(command)