Spaces:
Sleeping
Sleeping
Commit
·
b7dad17
1
Parent(s):
ba08945
bfsdbv
Browse files
app.py
CHANGED
|
@@ -33,6 +33,18 @@ id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefi
|
|
| 33 |
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 34 |
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
@spaces.GPU
|
| 37 |
def model_inference(
|
| 38 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
|
@@ -91,17 +103,6 @@ def model_inference(
|
|
| 91 |
generated_texts = id_processor.batch_decode(generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True)
|
| 92 |
return generated_texts[0]
|
| 93 |
|
| 94 |
-
# Load model
|
| 95 |
-
model_name = "vidore/colpali-v1.2"
|
| 96 |
-
token = os.environ.get("HF_TOKEN")
|
| 97 |
-
model = ColPali.from_pretrained(
|
| 98 |
-
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
| 99 |
-
|
| 100 |
-
model.load_adapter(model_name)
|
| 101 |
-
model = model.eval()
|
| 102 |
-
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
| 103 |
-
|
| 104 |
-
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
| 105 |
|
| 106 |
|
| 107 |
@spaces.GPU
|
|
|
|
| 33 |
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 34 |
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
| 35 |
|
| 36 |
+
# Load colpali model
|
| 37 |
+
model_name = "vidore/colpali-v1.2"
|
| 38 |
+
token = os.environ.get("HF_TOKEN")
|
| 39 |
+
model = ColPali.from_pretrained(
|
| 40 |
+
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
| 41 |
+
|
| 42 |
+
model.load_adapter(model_name)
|
| 43 |
+
model = model.eval()
|
| 44 |
+
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
| 45 |
+
|
| 46 |
+
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
| 47 |
+
|
| 48 |
@spaces.GPU
|
| 49 |
def model_inference(
|
| 50 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
|
|
|
| 103 |
generated_texts = id_processor.batch_decode(generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True)
|
| 104 |
return generated_texts[0]
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
@spaces.GPU
|