Spaces:
Sleeping
Sleeping
Commit
·
9f28ec7
1
Parent(s):
c34d360
fsdv
Browse files
app.py
CHANGED
|
@@ -22,34 +22,25 @@ import subprocess
|
|
| 22 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 23 |
|
| 24 |
|
| 25 |
-
## Load idefics
|
| 26 |
-
id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
| 27 |
|
| 28 |
-
id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
| 29 |
-
torch_dtype=torch.bfloat16,
|
| 30 |
-
#_attn_implementation="flash_attention_2"
|
| 31 |
-
).to("cuda")
|
| 32 |
|
| 33 |
-
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 34 |
-
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
| 35 |
|
| 36 |
-
# Load colpali model
|
| 37 |
-
model_name = "vidore/colpali-v1.2"
|
| 38 |
-
token = os.environ.get("HF_TOKEN")
|
| 39 |
-
model = ColPali.from_pretrained(
|
| 40 |
-
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
| 41 |
-
|
| 42 |
-
model.load_adapter(model_name)
|
| 43 |
-
model = model.eval()
|
| 44 |
-
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
| 45 |
-
|
| 46 |
-
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
| 47 |
|
| 48 |
@spaces.GPU
|
| 49 |
def model_inference(
|
| 50 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
| 51 |
repetition_penalty=1.2, top_p=0.8
|
| 52 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
print(type(images))
|
| 54 |
images = images[0]
|
| 55 |
print(type(images))
|
|
@@ -111,6 +102,18 @@ def model_inference(
|
|
| 111 |
@spaces.GPU
|
| 112 |
def search(query: str, ds, images, k):
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 115 |
if device != model.device:
|
| 116 |
model.to(device)
|
|
|
|
| 22 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
@spaces.GPU
|
| 30 |
def model_inference(
|
| 31 |
images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
|
| 32 |
repetition_penalty=1.2, top_p=0.8
|
| 33 |
):
|
| 34 |
+
## Load idefics
|
| 35 |
+
id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
|
| 36 |
+
|
| 37 |
+
id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
|
| 38 |
+
torch_dtype=torch.bfloat16,
|
| 39 |
+
#_attn_implementation="flash_attention_2"
|
| 40 |
+
).to("cuda")
|
| 41 |
+
|
| 42 |
+
BAD_WORDS_IDS = id_processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
|
| 43 |
+
EOS_WORDS_IDS = [id_processor.tokenizer.eos_token_id]
|
| 44 |
print(type(images))
|
| 45 |
images = images[0]
|
| 46 |
print(type(images))
|
|
|
|
| 102 |
@spaces.GPU
|
| 103 |
def search(query: str, ds, images, k):
|
| 104 |
|
| 105 |
+
# Load colpali model
|
| 106 |
+
model_name = "vidore/colpali-v1.2"
|
| 107 |
+
token = os.environ.get("HF_TOKEN")
|
| 108 |
+
model = ColPali.from_pretrained(
|
| 109 |
+
"vidore/colpaligemma-3b-pt-448-base", torch_dtype=torch.bfloat16, device_map="cuda", token = token).eval()
|
| 110 |
+
|
| 111 |
+
model.load_adapter(model_name)
|
| 112 |
+
model = model.eval()
|
| 113 |
+
processor = AutoProcessor.from_pretrained(model_name, token = token)
|
| 114 |
+
|
| 115 |
+
mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
|
| 116 |
+
|
| 117 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 118 |
if device != model.device:
|
| 119 |
model.to(device)
|