Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,7 @@ enhancer_long = pipeline("summarization", model="prithivMLmods/t5-Flan-Prompt-En
|
|
32 |
MAX_SEED = np.iinfo(np.int32).max
|
33 |
MAX_IMAGE_SIZE = 2048
|
34 |
|
35 |
-
# Qwen2VL caption function – updated
|
36 |
@spaces.GPU
|
37 |
def qwen_caption(image):
|
38 |
# Convert image to PIL if needed
|
@@ -48,7 +48,7 @@ def qwen_caption(image):
|
|
48 |
],
|
49 |
}
|
50 |
]
|
51 |
-
|
52 |
text = qwen_processor.apply_chat_template(
|
53 |
messages, tokenize=False, add_generation_prompt=True
|
54 |
)
|
@@ -59,17 +59,11 @@ def qwen_caption(image):
|
|
59 |
videos=video_inputs,
|
60 |
padding=True,
|
61 |
return_tensors="pt",
|
62 |
-
)
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
# Wrap generation in no_grad and autocast contexts to prevent extra memory usage and potential caching issues
|
67 |
-
with torch.no_grad():
|
68 |
-
with torch.cuda.amp.autocast(device_type="cuda", dtype=torch.float16):
|
69 |
-
generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
|
70 |
-
|
71 |
generated_ids_trimmed = [
|
72 |
-
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs
|
73 |
]
|
74 |
output_text = qwen_processor.batch_decode(
|
75 |
generated_ids_trimmed,
|
@@ -124,7 +118,7 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
|
|
124 |
|
125 |
custom_css = """
|
126 |
.input-group, .output-group {
|
127 |
-
|
128 |
}
|
129 |
.submit-btn {
|
130 |
background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
|
|
|
32 |
MAX_SEED = np.iinfo(np.int32).max
|
33 |
MAX_IMAGE_SIZE = 2048
|
34 |
|
35 |
+
# Qwen2VL caption function – updated to request plain text caption instead of JSON
|
36 |
@spaces.GPU
|
37 |
def qwen_caption(image):
|
38 |
# Convert image to PIL if needed
|
|
|
48 |
],
|
49 |
}
|
50 |
]
|
51 |
+
|
52 |
text = qwen_processor.apply_chat_template(
|
53 |
messages, tokenize=False, add_generation_prompt=True
|
54 |
)
|
|
|
59 |
videos=video_inputs,
|
60 |
padding=True,
|
61 |
return_tensors="pt",
|
62 |
+
).to(device)
|
63 |
+
|
64 |
+
generated_ids = qwen_model.generate(**inputs, max_new_tokens=1024)
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
generated_ids_trimmed = [
|
66 |
+
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
67 |
]
|
68 |
output_text = qwen_processor.batch_decode(
|
69 |
generated_ids_trimmed,
|
|
|
118 |
|
119 |
custom_css = """
|
120 |
.input-group, .output-group {
|
121 |
+
|
122 |
}
|
123 |
.submit-btn {
|
124 |
background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important;
|