Spaces:
Runtime error
Runtime error
Clémentine
commited on
Commit
·
ead4c96
1
Parent(s):
bde3a6f
Fix tokenizer checks - much simpler
Browse files
src/submission/check_validity.py
CHANGED
|
@@ -7,7 +7,7 @@ from datetime import datetime, timedelta, timezone
|
|
| 7 |
import huggingface_hub
|
| 8 |
from huggingface_hub import ModelCard
|
| 9 |
from huggingface_hub.hf_api import ModelInfo
|
| 10 |
-
from transformers import AutoConfig
|
| 11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
| 12 |
|
| 13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
|
@@ -41,18 +41,12 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
| 41 |
try:
|
| 42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 43 |
if test_tokenizer:
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
else:
|
| 48 |
-
tokenizer_class_candidate = config.tokenizer_class
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
|
| 52 |
-
if tokenizer_class is None:
|
| 53 |
return (
|
| 54 |
False,
|
| 55 |
-
f"uses
|
| 56 |
None
|
| 57 |
)
|
| 58 |
return True, None, config
|
|
|
|
| 7 |
import huggingface_hub
|
| 8 |
from huggingface_hub import ModelCard
|
| 9 |
from huggingface_hub.hf_api import ModelInfo
|
| 10 |
+
from transformers import AutoConfig, AutoTokenizer
|
| 11 |
from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
|
| 12 |
|
| 13 |
from src.envs import HAS_HIGHER_RATE_LIMIT
|
|
|
|
| 41 |
try:
|
| 42 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 43 |
if test_tokenizer:
|
| 44 |
+
try:
|
| 45 |
+
AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
| 46 |
+
except ValueError as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
return (
|
| 48 |
False,
|
| 49 |
+
f"uses a tokenizer which is not in a transformers release: {e}",
|
| 50 |
None
|
| 51 |
)
|
| 52 |
return True, None, config
|