prithivMLmods commited on
Commit
5a00865
·
verified ·
1 Parent(s): b604e8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -32,7 +32,7 @@ enhancer_long = pipeline("summarization", model="prithivMLmods/t5-Flan-Prompt-En
32
  MAX_SEED = np.iinfo(np.int32).max
33
  MAX_IMAGE_SIZE = 2048
34
 
35
- # Qwen2VL caption function – updated with no_grad and autocast contexts, and explicit device moves
36
  @spaces.GPU
37
  def qwen_caption(image):
38
  # Convert image to PIL if needed
@@ -48,7 +48,7 @@ def qwen_caption(image):
48
  ],
49
  }
50
  ]
51
-
52
  text = qwen_processor.apply_chat_template(
53
  messages, tokenize=False, add_generation_prompt=True
54
  )
@@ -59,17 +59,11 @@ def qwen_caption(image):
59
  videos=video_inputs,
60
  padding=True,
61
  return_tensors="pt",
62
- )
63
- # Explicitly move each tensor to device
64
- inputs = {k: v.to(device) for k, v in inputs.items()}
65
-
66
- # Wrap generation in no_grad and autocast contexts to prevent extra memory usage and potential caching issues
67
- with torch.no_grad():
68
- with torch.cuda.amp.autocast(device_type="cuda", dtype=torch.float16):
69
- generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
70
-
71
  generated_ids_trimmed = [
72
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs["input_ids"], generated_ids)
73
  ]
74
  output_text = qwen_processor.batch_decode(
75
  generated_ids_trimmed,
@@ -124,7 +118,7 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
124
 
125
  custom_css = """
126
  .input-group, .output-group {
127
- /* You can add styling here if needed */
128
  }
129
  .submit-btn {
130
  background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
 
32
  MAX_SEED = np.iinfo(np.int32).max
33
  MAX_IMAGE_SIZE = 2048
34
 
35
+ # Qwen2VL caption function – updated to request plain text caption instead of JSON
36
  @spaces.GPU
37
  def qwen_caption(image):
38
  # Convert image to PIL if needed
 
48
  ],
49
  }
50
  ]
51
+
52
  text = qwen_processor.apply_chat_template(
53
  messages, tokenize=False, add_generation_prompt=True
54
  )
 
59
  videos=video_inputs,
60
  padding=True,
61
  return_tensors="pt",
62
+ ).to(device)
63
+
64
+ generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
 
 
 
 
 
 
65
  generated_ids_trimmed = [
66
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
67
  ]
68
  output_text = qwen_processor.batch_decode(
69
  generated_ids_trimmed,
 
118
 
119
  custom_css = """
120
  .input-group, .output-group {
121
+
122
  }
123
  .submit-btn {
124
  background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;