broadfield-dev commited on
Commit
c1b8cf8
·
verified ·
1 Parent(s): b4b995a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -46
app.py CHANGED
@@ -4,8 +4,8 @@ import sys
4
  from pathlib import Path
5
 
6
  # --- 0. Hardcoded Toggle for Execution Environment ---
7
- # Set this to True to use Hugging Face ZeroGPU
8
- # Set this to False to use a pure CPU environment
9
  USE_ZEROGPU = True
10
 
11
  # --- 1. Clone the VibeVoice Repository ---
@@ -44,12 +44,12 @@ except subprocess.CalledProcessError as e:
44
  print(f"Error installing package: {e.stderr}")
45
  sys.exit(1)
46
 
47
- # Install 'spaces' if using ZeroGPU
48
  if USE_ZEROGPU:
49
  print("Installing the 'spaces' library for ZeroGPU...")
50
  try:
51
  subprocess.run(
52
- [sys.executable, "-m", "pip", "install", "huggingface-hub", "gradio", "spaces"],
53
  check=True,
54
  capture_output=True,
55
  text=True
@@ -59,74 +59,68 @@ if USE_ZEROGPU:
59
  print(f"Error installing 'spaces' library: {e.stderr}")
60
  sys.exit(1)
61
 
62
-
63
  # --- 3. Modify the demo script based on the toggle ---
64
  demo_script_path = Path("demo/gradio_demo.py")
65
  print(f"Reading {demo_script_path}...")
66
 
67
  try:
68
  file_content = demo_script_path.read_text()
69
-
70
- if USE_ZEROGPU:
71
- print("Optimizing for ZeroGPU execution...")
72
 
73
- # Ensure the original GPU block is present
74
- original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
 
75
  self.model_path,
76
  torch_dtype=torch.bfloat16,
77
  device_map='cuda',
78
  attn_implementation="flash_attention_2",
79
  )"""
80
 
81
- if original_block in file_content:
82
- # Add 'import spaces' at the beginning of the file
83
- modified_content = "import spaces\n" + file_content
84
-
85
- # Decorate the model loading and generation functions with @spaces.GPU
86
- # This is a robust way to ensure both setup and inference get GPU access
87
- modified_content = modified_content.replace(
88
- "class VibeVoiceGradioInterface:",
89
- "@spaces.GPU\nclass VibeVoiceGradioInterface:"
90
- )
91
- print("Script modified for ZeroGPU successfully.")
92
-
93
- # Write the modified content back to the file
94
- demo_script_path.write_text(modified_content)
95
- else:
96
- print("Warning: Original GPU-specific model loading block not found. The script might have been updated. Proceeding with potential ZeroGPU compatibility.")
97
-
98
- else:
99
- print("Modifying for CPU execution...")
100
- # Define the original GPU-specific model loading block
101
- original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
102
  self.model_path,
103
  torch_dtype=torch.bfloat16,
104
  device_map='cuda',
105
- attn_implementation="flash_attention_2",
106
  )"""
107
-
108
- # Define the new CPU-compatible block
109
- replacement_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  self.model_path,
111
  torch_dtype=torch.float32, # Use float32 for CPU
112
  device_map="cpu",
113
  )"""
 
 
 
 
114
 
115
- # Replace the entire block
116
- if original_block in file_content:
117
- modified_content = file_content.replace(original_block, replacement_block)
118
-
119
- # Write the modified content back to the file
120
- demo_script_path.write_text(modified_content)
121
- print("Script modified for CPU successfully.")
122
- else:
123
- print("Warning: GPU-specific model loading block not found. The script might have been updated. Proceeding without modification.")
124
 
125
  except Exception as e:
126
  print(f"An error occurred while modifying the script: {e}")
127
  sys.exit(1)
128
 
129
-
130
  # --- 4. Launch the Gradio Demo ---
131
  model_id = "microsoft/VibeVoice-1.5B"
132
 
@@ -140,5 +134,4 @@ command = [
140
  ]
141
 
142
  print(f"Launching Gradio demo with command: {' '.join(command)}")
143
- # This command will start the Gradio server
144
  subprocess.run(command)
 
4
  from pathlib import Path
5
 
6
  # --- 0. Hardcoded Toggle for Execution Environment ---
7
+ # Set this to True to use Hugging Face ZeroGPU (recommended)
8
+ # Set this to False to use the slower, pure CPU environment
9
  USE_ZEROGPU = True
10
 
11
  # --- 1. Clone the VibeVoice Repository ---
 
44
  print(f"Error installing package: {e.stderr}")
45
  sys.exit(1)
46
 
47
+ # Install 'spaces' if using ZeroGPU, as it's required for the decorator
48
  if USE_ZEROGPU:
49
  print("Installing the 'spaces' library for ZeroGPU...")
50
  try:
51
  subprocess.run(
52
+ [sys.executable, "-m", "pip", "install", "spaces"],
53
  check=True,
54
  capture_output=True,
55
  text=True
 
59
  print(f"Error installing 'spaces' library: {e.stderr}")
60
  sys.exit(1)
61
 
 
62
  # --- 3. Modify the demo script based on the toggle ---
63
  demo_script_path = Path("demo/gradio_demo.py")
64
  print(f"Reading {demo_script_path}...")
65
 
66
  try:
67
  file_content = demo_script_path.read_text()
 
 
 
68
 
69
+ # Define the original GPU-specific model loading block we want to replace
70
+ # This block is problematic because it hardcodes FlashAttention
71
+ original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
72
  self.model_path,
73
  torch_dtype=torch.bfloat16,
74
  device_map='cuda',
75
  attn_implementation="flash_attention_2",
76
  )"""
77
 
78
+ if USE_ZEROGPU:
79
+ print("Optimizing for ZeroGPU execution...")
80
+
81
+ # New block for ZeroGPU: We remove the problematic flash_attention line.
82
+ # Transformers will automatically use the best available attention mechanism.
83
+ replacement_block_gpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  self.model_path,
85
  torch_dtype=torch.bfloat16,
86
  device_map='cuda',
 
87
  )"""
88
+
89
+ # Add 'import spaces' at the beginning of the file
90
+ modified_content = "import spaces\n" + file_content
91
+
92
+ # Decorate the main class with @spaces.GPU to request a GPU
93
+ modified_content = modified_content.replace(
94
+ "class VibeVoiceGradioInterface:",
95
+ "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
96
+ )
97
+
98
+ # Replace the model loading block
99
+ modified_content = modified_content.replace(original_block, replacement_block_gpu)
100
+ print("Script modified for ZeroGPU successfully.")
101
+
102
+ else: # Pure CPU execution
103
+ print("Modifying for pure CPU execution...")
104
+
105
+ # New block for CPU: Use float32 and map directly to CPU.
106
+ # FlashAttention is not compatible with CPU.
107
+ replacement_block_cpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
108
  self.model_path,
109
  torch_dtype=torch.float32, # Use float32 for CPU
110
  device_map="cpu",
111
  )"""
112
+
113
+ # Replace the model loading block
114
+ modified_content = file_content.replace(original_block, replacement_block_cpu)
115
+ print("Script modified for CPU successfully.")
116
 
117
+ # Write the modified content back to the file
118
+ demo_script_path.write_text(modified_content)
 
 
 
 
 
 
 
119
 
120
  except Exception as e:
121
  print(f"An error occurred while modifying the script: {e}")
122
  sys.exit(1)
123
 
 
124
  # --- 4. Launch the Gradio Demo ---
125
  model_id = "microsoft/VibeVoice-1.5B"
126
 
 
134
  ]
135
 
136
  print(f"Launching Gradio demo with command: {' '.join(command)}")
 
137
  subprocess.run(command)