seawolf2357 commited on
Commit
dfe713c
ยท
verified ยท
1 Parent(s): 490cabd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -25
app.py CHANGED
@@ -18,6 +18,7 @@ def sh(cmd): subprocess.check_call(cmd, shell=True)
18
 
19
  flash_attention_installed = False
20
 
 
21
  try:
22
  print("Attempting to download and install FlashAttention wheel...")
23
  flash_attention_wheel = hf_hub_download(
@@ -38,6 +39,26 @@ except Exception as e:
38
  print(f"โš ๏ธ Could not install FlashAttention: {e}")
39
  print("Continuing without FlashAttention...")
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  import torch
42
  print(f"Torch version: {torch.__version__}")
43
  print(f"FlashAttention available: {flash_attention_installed}")
@@ -101,30 +122,26 @@ snapshot_download(
101
  # Initialize OviFusionEngine
102
  enable_cpu_offload = args.cpu_offload
103
  print(f"loading model...")
104
- DEFAULT_CONFIG['cpu_offload'] = enable_cpu_offload # always use cpu offload if image generation is enabled
105
- DEFAULT_CONFIG['mode'] = "t2v" # hardcoded since it is always cpu offloaded
106
  ovi_engine = OviFusionEngine()
107
  print("loaded model")
108
 
109
 
110
  def resize_for_model(image_path):
111
- # Open image
112
  img = Image.open(image_path)
113
  w, h = img.size
114
  aspect_ratio = w / h
115
 
116
- # Decide target size based on aspect ratio
117
- if aspect_ratio > 1.5: # wide image
118
  target_size = (992, 512)
119
- elif aspect_ratio < 0.66: # tall image
120
  target_size = (512, 992)
121
- else: # roughly square
122
  target_size = (512, 512)
123
 
124
- # Resize while preserving aspect ratio, then pad
125
  img.thumbnail(target_size, Image.Resampling.LANCZOS)
126
 
127
- # Create a new image with target size and paste centered
128
  new_img = Image.new("RGB", target_size, (0, 0, 0))
129
  new_img.paste(
130
  img,
@@ -153,11 +170,9 @@ def generate_scene(
153
  if not image:
154
  raise gr.Error("Please provide an image")
155
 
156
-
157
  if not text_prompt_processed:
158
  raise gr.Error("Please enter a prompt.")
159
 
160
-
161
  return generate_video(text_prompt,
162
  image,
163
  sample_steps,
@@ -188,7 +203,6 @@ def get_duration(
188
  progress,
189
  ):
190
  warmup = 20
191
-
192
  return int(sample_steps * 3 + warmup)
193
 
194
 
@@ -217,12 +231,10 @@ def generate_video(
217
  if session_id is None:
218
  session_id = uuid.uuid4().hex
219
 
220
-
221
  output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
222
  os.makedirs(output_dir, exist_ok=True)
223
  output_path = os.path.join(output_dir, f"generated_video.mp4")
224
 
225
-
226
  _, target_size = resize_for_model(image_path)
227
 
228
  video_frame_width = target_size[0]
@@ -252,14 +264,12 @@ def generate_video(
252
 
253
 
254
  def cleanup(request: gr.Request):
255
-
256
  sid = request.session_hash
257
  if sid:
258
  d1 = os.path.join(os.environ["PROCESSED_RESULTS"], sid)
259
  shutil.rmtree(d1, ignore_errors=True)
260
 
261
  def start_session(request: gr.Request):
262
-
263
  return request.session_hash
264
 
265
  css = """
@@ -297,7 +307,6 @@ with gr.Blocks(css=css, theme=theme) as demo:
297
  )
298
  with gr.Row():
299
  with gr.Column():
300
- # Image section
301
  image = gr.Image(type="filepath", label="Image", height=360)
302
 
303
  video_text_prompt = gr.Textbox(label="Scene Prompt",
@@ -341,37 +350,31 @@ with gr.Blocks(css=css, theme=theme) as demo:
341
  video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
342
  audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
343
 
344
-
345
  with gr.Column():
346
  output_path = gr.Video(label="Generated Video", height=360)
347
 
348
  gr.Examples(
349
  examples=[
350
-
351
  [
352
  "The video opens with a close-up of a woman with vibrant reddish-orange, shoulder-length hair and heavy dark eye makeup. She is wearing a dark brown leather jacket over a grey hooded top. She looks intently to her right, her mouth slightly agape, and her expression is serious and focused. The background shows a room with light green walls and dark wooden cabinets on the left, and a green plant on the right. She speaks, her voice clear and direct, saying, <S>doing<E>. She then pauses briefly, her gaze unwavering, and continues, <S>And I need you to trust them.<E>. Her mouth remains slightly open, indicating she is either about to speak more or has just finished a sentence, with a look of intense sincerity.. <AUDCAP>Tense, dramatic background music, clear female voice.<ENDAUDCAP>",
353
  "example_prompts/pngs/8.png",
354
  50,
355
  ],
356
-
357
  [
358
  "A young woman with long, wavy blonde hair and light-colored eyes is shown in a medium shot against a blurred backdrop of lush green foliage. She wears a denim jacket over a striped top. Initially, her eyes are closed and her mouth is slightly open as she speaks, <S>Enjoy this moment<E>. Her eyes then slowly open, looking slightly upwards and to the right, as her expression shifts to one of thoughtful contemplation. She continues to speak, <S>No matter where it's taking<E>, her gaze then settling with a serious and focused look towards someone off-screen to her right.. <AUDCAP>Clear female voice, faint ambient outdoor sounds.<ENDAUDCAP>",
359
  "example_prompts/pngs/2.png",
360
  50,
361
  ],
362
-
363
  [
364
  "A bearded man wearing large dark sunglasses and a blue patterned cardigan sits in a studio, actively speaking into a large, suspended microphone. He has headphones on and gestures with his hands, displaying rings on his fingers. Behind him, a wall is covered with red, textured sound-dampening foam on the left, and a white banner on the right features the ""CHOICE FM"" logo and various social media handles like ""@ilovechoicefm"" with ""RALEIGH"" below it. The man intently addresses the microphone, articulating, <S>is talent. It's all about authenticity. You gotta be who you really are, especially if you're working<E>. He leans forward slightly as he speaks, maintaining a serious expression behind his sunglasses.. <AUDCAP>Clear male voice speaking into a microphone, a low background hum.<ENDAUDCAP>",
365
  "example_prompts/pngs/5.png",
366
  50,
367
  ],
368
-
369
  [
370
  "The video opens with a close-up on an older man with long, grey hair and a short, grey beard, wearing dark sunglasses. He is clad in a dark coat, possibly with fur trim, and black gloves. His face is angled slightly upwards and to the right, as he begins to speak, his mouth slightly open. In the immediate foreground, out of focus, is the dark-clad shoulder and the back of the head of another person. The man articulates, <S>labbra. Ti ci vorrebbe...<E> His expression remains contemplative, and he continues, seemingly completing his thought, <S>Un ego solare.<E> The background behind him is a textured, grey stone wall, suggesting an outdoor setting. The man's gaze remains fixed upwards, his expression thoughtful.. <AUDCAP>A clear, slightly low-pitched male voice speaking Italian. The overall soundscape is quiet, with no prominent background noises or music.<ENDAUDCAP>",
371
  "example_prompts/pngs/7.png",
372
  50,
373
  ],
374
-
375
  [
376
  "The scene is set outdoors with a blurry, bright green background, suggesting grass and a sunny environment. On the left, a woman with long, dark hair, wearing a red top and a necklace with a white pendant, faces towards the right. Her expression is serious and slightly perturbed as she speaks, with her lips slightly pursed. She says, <S>UFO, UFC thing.<E> On the right, the back of a man's head and his right ear are visible, indicating he is facing away from the camera, listening to the woman. He has short, dark hair. The woman continues speaking, her expression remaining serious, <S>And if you're not watching that, it's one of those ancient movies from an era that's<E> as the frame holds steady on the two figures.. <AUDCAP>Clear female speech, distant low-frequency hum.<ENDAUDCAP>",
377
  "example_prompts/pngs/9.png",
@@ -393,5 +396,4 @@ with gr.Blocks(css=css, theme=theme) as demo:
393
  if __name__ == "__main__":
394
  demo.unload(cleanup)
395
  demo.queue()
396
- demo.launch(ssr_mode=False, share=True)
397
-
 
18
 
19
  flash_attention_installed = False
20
 
21
+ # FlashAttention ์„ค์น˜ ์‹œ๋„ - ์‹คํŒจํ•ด๋„ ๊ณ„์† ์ง„ํ–‰
22
  try:
23
  print("Attempting to download and install FlashAttention wheel...")
24
  flash_attention_wheel = hf_hub_download(
 
39
  print(f"โš ๏ธ Could not install FlashAttention: {e}")
40
  print("Continuing without FlashAttention...")
41
 
42
+ # ===== CRITICAL FIX: attention.py ํŒŒ์ผ ํŒจ์น˜ =====
43
+ attention_file = "/home/user/app/ovi/modules/attention.py"
44
+ if os.path.exists(attention_file):
45
+ try:
46
+ with open(attention_file, 'r') as f:
47
+ content = f.read()
48
+
49
+ # FLASH_ATTN_3_AVAILABLE ๋ณ€์ˆ˜๊ฐ€ ์ •์˜๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ๋ฅผ ์œ„ํ•œ ํŒจ์น˜
50
+ if 'FLASH_ATTN_3_AVAILABLE' not in content.split('try:')[0]:
51
+ # ํŒŒ์ผ ์‹œ์ž‘ ๋ถ€๋ถ„์— ๋ณ€์ˆ˜ ์ดˆ๊ธฐํ™” ์ถ”๊ฐ€
52
+ patched_content = f"FLASH_ATTN_3_AVAILABLE = False\n\n{content}"
53
+
54
+ with open(attention_file, 'w') as f:
55
+ f.write(patched_content)
56
+
57
+ print("โœ“ Successfully patched attention.py")
58
+ except Exception as e:
59
+ print(f"โš ๏ธ Could not patch attention.py: {e}")
60
+ # ===== END FIX =====
61
+
62
  import torch
63
  print(f"Torch version: {torch.__version__}")
64
  print(f"FlashAttention available: {flash_attention_installed}")
 
122
  # Initialize OviFusionEngine
123
  enable_cpu_offload = args.cpu_offload
124
  print(f"loading model...")
125
+ DEFAULT_CONFIG['cpu_offload'] = enable_cpu_offload
126
+ DEFAULT_CONFIG['mode'] = "t2v"
127
  ovi_engine = OviFusionEngine()
128
  print("loaded model")
129
 
130
 
131
  def resize_for_model(image_path):
 
132
  img = Image.open(image_path)
133
  w, h = img.size
134
  aspect_ratio = w / h
135
 
136
+ if aspect_ratio > 1.5:
 
137
  target_size = (992, 512)
138
+ elif aspect_ratio < 0.66:
139
  target_size = (512, 992)
140
+ else:
141
  target_size = (512, 512)
142
 
 
143
  img.thumbnail(target_size, Image.Resampling.LANCZOS)
144
 
 
145
  new_img = Image.new("RGB", target_size, (0, 0, 0))
146
  new_img.paste(
147
  img,
 
170
  if not image:
171
  raise gr.Error("Please provide an image")
172
 
 
173
  if not text_prompt_processed:
174
  raise gr.Error("Please enter a prompt.")
175
 
 
176
  return generate_video(text_prompt,
177
  image,
178
  sample_steps,
 
203
  progress,
204
  ):
205
  warmup = 20
 
206
  return int(sample_steps * 3 + warmup)
207
 
208
 
 
231
  if session_id is None:
232
  session_id = uuid.uuid4().hex
233
 
 
234
  output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
235
  os.makedirs(output_dir, exist_ok=True)
236
  output_path = os.path.join(output_dir, f"generated_video.mp4")
237
 
 
238
  _, target_size = resize_for_model(image_path)
239
 
240
  video_frame_width = target_size[0]
 
264
 
265
 
266
  def cleanup(request: gr.Request):
 
267
  sid = request.session_hash
268
  if sid:
269
  d1 = os.path.join(os.environ["PROCESSED_RESULTS"], sid)
270
  shutil.rmtree(d1, ignore_errors=True)
271
 
272
  def start_session(request: gr.Request):
 
273
  return request.session_hash
274
 
275
  css = """
 
307
  )
308
  with gr.Row():
309
  with gr.Column():
 
310
  image = gr.Image(type="filepath", label="Image", height=360)
311
 
312
  video_text_prompt = gr.Textbox(label="Scene Prompt",
 
350
  video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
351
  audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
352
 
 
353
  with gr.Column():
354
  output_path = gr.Video(label="Generated Video", height=360)
355
 
356
  gr.Examples(
357
  examples=[
 
358
  [
359
  "The video opens with a close-up of a woman with vibrant reddish-orange, shoulder-length hair and heavy dark eye makeup. She is wearing a dark brown leather jacket over a grey hooded top. She looks intently to her right, her mouth slightly agape, and her expression is serious and focused. The background shows a room with light green walls and dark wooden cabinets on the left, and a green plant on the right. She speaks, her voice clear and direct, saying, <S>doing<E>. She then pauses briefly, her gaze unwavering, and continues, <S>And I need you to trust them.<E>. Her mouth remains slightly open, indicating she is either about to speak more or has just finished a sentence, with a look of intense sincerity.. <AUDCAP>Tense, dramatic background music, clear female voice.<ENDAUDCAP>",
360
  "example_prompts/pngs/8.png",
361
  50,
362
  ],
 
363
  [
364
  "A young woman with long, wavy blonde hair and light-colored eyes is shown in a medium shot against a blurred backdrop of lush green foliage. She wears a denim jacket over a striped top. Initially, her eyes are closed and her mouth is slightly open as she speaks, <S>Enjoy this moment<E>. Her eyes then slowly open, looking slightly upwards and to the right, as her expression shifts to one of thoughtful contemplation. She continues to speak, <S>No matter where it's taking<E>, her gaze then settling with a serious and focused look towards someone off-screen to her right.. <AUDCAP>Clear female voice, faint ambient outdoor sounds.<ENDAUDCAP>",
365
  "example_prompts/pngs/2.png",
366
  50,
367
  ],
 
368
  [
369
  "A bearded man wearing large dark sunglasses and a blue patterned cardigan sits in a studio, actively speaking into a large, suspended microphone. He has headphones on and gestures with his hands, displaying rings on his fingers. Behind him, a wall is covered with red, textured sound-dampening foam on the left, and a white banner on the right features the ""CHOICE FM"" logo and various social media handles like ""@ilovechoicefm"" with ""RALEIGH"" below it. The man intently addresses the microphone, articulating, <S>is talent. It's all about authenticity. You gotta be who you really are, especially if you're working<E>. He leans forward slightly as he speaks, maintaining a serious expression behind his sunglasses.. <AUDCAP>Clear male voice speaking into a microphone, a low background hum.<ENDAUDCAP>",
370
  "example_prompts/pngs/5.png",
371
  50,
372
  ],
 
373
  [
374
  "The video opens with a close-up on an older man with long, grey hair and a short, grey beard, wearing dark sunglasses. He is clad in a dark coat, possibly with fur trim, and black gloves. His face is angled slightly upwards and to the right, as he begins to speak, his mouth slightly open. In the immediate foreground, out of focus, is the dark-clad shoulder and the back of the head of another person. The man articulates, <S>labbra. Ti ci vorrebbe...<E> His expression remains contemplative, and he continues, seemingly completing his thought, <S>Un ego solare.<E> The background behind him is a textured, grey stone wall, suggesting an outdoor setting. The man's gaze remains fixed upwards, his expression thoughtful.. <AUDCAP>A clear, slightly low-pitched male voice speaking Italian. The overall soundscape is quiet, with no prominent background noises or music.<ENDAUDCAP>",
375
  "example_prompts/pngs/7.png",
376
  50,
377
  ],
 
378
  [
379
  "The scene is set outdoors with a blurry, bright green background, suggesting grass and a sunny environment. On the left, a woman with long, dark hair, wearing a red top and a necklace with a white pendant, faces towards the right. Her expression is serious and slightly perturbed as she speaks, with her lips slightly pursed. She says, <S>UFO, UFC thing.<E> On the right, the back of a man's head and his right ear are visible, indicating he is facing away from the camera, listening to the woman. He has short, dark hair. The woman continues speaking, her expression remaining serious, <S>And if you're not watching that, it's one of those ancient movies from an era that's<E> as the frame holds steady on the two figures.. <AUDCAP>Clear female speech, distant low-frequency hum.<ENDAUDCAP>",
380
  "example_prompts/pngs/9.png",
 
396
  if __name__ == "__main__":
397
  demo.unload(cleanup)
398
  demo.queue()
399
+ demo.launch(ssr_mode=False, share=True)