prithivMLmods commited on
Commit
b604e8c
·
verified ·
1 Parent(s): 3ed9f62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -7
app.py CHANGED
@@ -32,7 +32,7 @@ enhancer_long = pipeline("summarization", model="prithivMLmods/t5-Flan-Prompt-En
32
  MAX_SEED = np.iinfo(np.int32).max
33
  MAX_IMAGE_SIZE = 2048
34
 
35
- # Qwen2VL caption function – updated to request plain text caption instead of JSON
36
  @spaces.GPU
37
  def qwen_caption(image):
38
  # Convert image to PIL if needed
@@ -48,7 +48,7 @@ def qwen_caption(image):
48
  ],
49
  }
50
  ]
51
-
52
  text = qwen_processor.apply_chat_template(
53
  messages, tokenize=False, add_generation_prompt=True
54
  )
@@ -59,11 +59,17 @@ def qwen_caption(image):
59
  videos=video_inputs,
60
  padding=True,
61
  return_tensors="pt",
62
- ).to(device)
63
-
64
- generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
 
 
 
 
 
 
65
  generated_ids_trimmed = [
66
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
67
  ]
68
  output_text = qwen_processor.batch_decode(
69
  generated_ids_trimmed,
@@ -118,7 +124,7 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
118
 
119
  custom_css = """
120
  .input-group, .output-group {
121
-
122
  }
123
  .submit-btn {
124
  background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
 
32
  MAX_SEED = np.iinfo(np.int32).max
33
  MAX_IMAGE_SIZE = 2048
34
 
35
+ # Qwen2VL caption function – updated with no_grad and autocast contexts, and explicit device moves
36
  @spaces.GPU
37
  def qwen_caption(image):
38
  # Convert image to PIL if needed
 
48
  ],
49
  }
50
  ]
51
+
52
  text = qwen_processor.apply_chat_template(
53
  messages, tokenize=False, add_generation_prompt=True
54
  )
 
59
  videos=video_inputs,
60
  padding=True,
61
  return_tensors="pt",
62
+ )
63
+ # Explicitly move each tensor to device
64
+ inputs = {k: v.to(device) for k, v in inputs.items()}
65
+
66
+ # Wrap generation in no_grad and autocast contexts to prevent extra memory usage and potential caching issues
67
+ with torch.no_grad():
68
+ with torch.cuda.amp.autocast(device_type="cuda", dtype=torch.float16):
69
+ generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
70
+
71
  generated_ids_trimmed = [
72
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
73
  ]
74
  output_text = qwen_processor.batch_decode(
75
  generated_ids_trimmed,
 
124
 
125
  custom_css = """
126
  .input-group, .output-group {
127
+ /* You can add styling here if needed */
128
  }
129
  .submit-btn {
130
  background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;