broadfield-dev commited on
Commit
fb12e2c
·
verified ·
1 Parent(s): b2be989

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -24
app.py CHANGED
@@ -51,34 +51,50 @@ print(f"Reading {demo_script_path} to apply environment-specific modifications..
51
  try:
52
  file_content = demo_script_path.read_text()
53
 
54
- # Define the original model loading block that we need to replace.
55
- # This block is problematic because it hardcodes FlashAttention.
56
- original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
57
- self.model_path,
58
- torch_dtype=torch.bfloat16,
59
- device_map='cuda',
60
- attn_implementation="flash_attention_2",
61
- )"""
 
 
 
 
 
 
 
 
 
 
62
 
63
  if USE_ZEROGPU:
64
  print("Optimizing for ZeroGPU execution...")
65
 
66
  # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
67
- # `transformers` will automatically use the best available attention mechanism.
68
- replacement_block_gpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
69
- self.model_path,
70
- torch_dtype=torch.bfloat16,
71
- device_map='cuda',
72
- )"""
 
 
73
 
74
  # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
75
- modified_content = "import spaces\n" + file_content
 
 
 
76
 
77
  # Decorate the main interface class to request a GPU from the Spaces infrastructure
78
- modified_content = modified_content.replace(
79
- "class VibeVoiceGradioInterface:",
80
- "@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:"
81
- )
 
82
 
83
  # Replace the model loading block
84
  modified_content = modified_content.replace(original_block, replacement_block_gpu)
@@ -88,11 +104,14 @@ try:
88
  print("Modifying for pure CPU execution...")
89
 
90
  # New block for CPU: Use float32 and map directly to the CPU.
91
- replacement_block_cpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(
92
- self.model_path,
93
- torch_dtype=torch.float32, # Use float32 for CPU
94
- device_map="cpu",
95
- )"""
 
 
 
96
 
97
  # Replace the original model loading block with the CPU version
98
  modified_content = file_content.replace(original_block, replacement_block_cpu)
 
51
  try:
52
  file_content = demo_script_path.read_text()
53
 
54
+ # Define the original model loading block using a list of lines for robustness.
55
+ # This avoids issues with indentation in multi-line string literals.
56
+ original_lines = [
57
+ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
58
+ ' self.model_path,',
59
+ ' torch_dtype=torch.bfloat16,',
60
+ " device_map='cuda',",
61
+ ' attn_implementation="flash_attention_2",',
62
+ ' )'
63
+ ]
64
+ original_block = "\n".join(original_lines)
65
+
66
+ # Check if the block to be patched exists in the file
67
+ if original_block not in file_content:
68
+ print("\033[91mError: The original code block to be patched was not found.\033[0m")
69
+ print("The demo script may have changed, or there might be a whitespace mismatch.")
70
+ print("Please verify the contents of demo/gradio_demo.py.")
71
+ sys.exit(1)
72
 
73
  if USE_ZEROGPU:
74
  print("Optimizing for ZeroGPU execution...")
75
 
76
  # New block for ZeroGPU: We remove the problematic `attn_implementation` line.
77
+ replacement_lines_gpu = [
78
+ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
79
+ ' self.model_path,',
80
+ ' torch_dtype=torch.bfloat16,',
81
+ " device_map='cuda',",
82
+ ' )'
83
+ ]
84
+ replacement_block_gpu = "\n".join(replacement_lines_gpu)
85
 
86
  # Add 'import spaces' at the beginning of the file for the @spaces.GPU decorator
87
+ if "import spaces" not in file_content:
88
+ modified_content = "import spaces\n" + file_content
89
+ else:
90
+ modified_content = file_content
91
 
92
  # Decorate the main interface class to request a GPU from the Spaces infrastructure
93
+ if "@spaces.GPU" not in modified_content:
94
+ modified_content = modified_content.replace(
95
+ "class VibeVoiceDemo:",
96
+ "@spaces.GPU(duration=120)\nclass VibeVoiceDemo:"
97
+ )
98
 
99
  # Replace the model loading block
100
  modified_content = modified_content.replace(original_block, replacement_block_gpu)
 
104
  print("Modifying for pure CPU execution...")
105
 
106
  # New block for CPU: Use float32 and map directly to the CPU.
107
+ replacement_lines_cpu = [
108
+ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
109
+ ' self.model_path,',
110
+ ' torch_dtype=torch.float32, # Use float32 for CPU',
111
+ ' device_map="cpu",',
112
+ ' )'
113
+ ]
114
+ replacement_block_cpu = "\n".join(replacement_lines_cpu)
115
 
116
  # Replace the original model loading block with the CPU version
117
  modified_content = file_content.replace(original_block, replacement_block_cpu)