Spaces:
Runtime error
Runtime error
Fix ViT-H builder
Browse files- app.py +1 -1
- tokenize_anything/models/easy_build.py +2 -0
app.py
CHANGED
|
@@ -31,7 +31,7 @@ from tokenize_anything.utils.image import im_vstack
|
|
| 31 |
def parse_args():
|
| 32 |
"""Parse arguments."""
|
| 33 |
parser = argparse.ArgumentParser(description="Launch gradio application")
|
| 34 |
-
parser.add_argument("--model-type", type=str, default="
|
| 35 |
parser.add_argument("--checkpoint", type=str, default="models/tap_vit_h_v1_1.pkl")
|
| 36 |
parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
|
| 37 |
parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices")
|
|
|
|
| 31 |
def parse_args():
|
| 32 |
"""Parse arguments."""
|
| 33 |
parser = argparse.ArgumentParser(description="Launch gradio application")
|
| 34 |
+
parser.add_argument("--model-type", type=str, default="tap_vit_h")
|
| 35 |
parser.add_argument("--checkpoint", type=str, default="models/tap_vit_h_v1_1.pkl")
|
| 36 |
parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
|
| 37 |
parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices")
|
tokenize_anything/models/easy_build.py
CHANGED
|
@@ -106,8 +106,10 @@ def image_tokenizer(image_encoder, checkpoint=None, device=0, dtype="float16", *
|
|
| 106 |
|
| 107 |
vit_b_encoder = partial(vit_encoder, depth=12, embed_dim=768, num_heads=12)
|
| 108 |
vit_l_encoder = partial(vit_encoder, depth=24, embed_dim=1024, num_heads=16)
|
|
|
|
| 109 |
|
| 110 |
model_registry = {
|
| 111 |
"tap_vit_b": partial(image_tokenizer, image_encoder=vit_b_encoder),
|
| 112 |
"tap_vit_l": partial(image_tokenizer, image_encoder=vit_l_encoder),
|
|
|
|
| 113 |
}
|
|
|
|
| 106 |
|
| 107 |
vit_b_encoder = partial(vit_encoder, depth=12, embed_dim=768, num_heads=12)
|
| 108 |
vit_l_encoder = partial(vit_encoder, depth=24, embed_dim=1024, num_heads=16)
|
| 109 |
+
vit_h_encoder = partial(vit_encoder, depth=32, embed_dim=1280, num_heads=16)
|
| 110 |
|
| 111 |
model_registry = {
|
| 112 |
"tap_vit_b": partial(image_tokenizer, image_encoder=vit_b_encoder),
|
| 113 |
"tap_vit_l": partial(image_tokenizer, image_encoder=vit_l_encoder),
|
| 114 |
+
"tap_vit_h": partial(image_tokenizer, image_encoder=vit_h_encoder),
|
| 115 |
}
|