Upload folder using huggingface_hub
Browse files- config.json +1 -0
- configuration_sm_subgroup_classifier.py +2 -1
- example.py +4 -1
- modeling_sm_subgroup_classifier.py +5 -22
config.json
CHANGED
|
@@ -4,5 +4,6 @@
|
|
| 4 |
"AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
|
| 5 |
"AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
|
| 6 |
},
|
|
|
|
| 7 |
"model_type": "sm_subgroup_classifier"
|
| 8 |
}
|
|
|
|
| 4 |
"AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
|
| 5 |
"AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
|
| 6 |
},
|
| 7 |
+
"available_models": ["fi_na-nb"],
|
| 8 |
"model_type": "sm_subgroup_classifier"
|
| 9 |
}
|
configuration_sm_subgroup_classifier.py
CHANGED
|
@@ -4,5 +4,6 @@ from transformers import PretrainedConfig
|
|
| 4 |
class SmSubgroupClassifierConfig(PretrainedConfig):
|
| 5 |
model_type = "sm_subgroup_classifier"
|
| 6 |
|
| 7 |
-
def __init__(self, **kwargs):
|
| 8 |
super().__init__(**kwargs)
|
|
|
|
|
|
| 4 |
class SmSubgroupClassifierConfig(PretrainedConfig):
|
| 5 |
model_type = "sm_subgroup_classifier"
|
| 6 |
|
| 7 |
+
def __init__(self, available_models=None, **kwargs):
|
| 8 |
super().__init__(**kwargs)
|
| 9 |
+
self.available_models = available_models or []
|
example.py
CHANGED
|
@@ -6,10 +6,13 @@ sm_classifier = AutoModel.from_pretrained(
|
|
| 6 |
"erikhenriksson/sm-subgroup-classifier", trust_remote_code=True
|
| 7 |
)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
# create a random 1024 dimensional embedding
|
| 10 |
|
| 11 |
embedding = np.random.rand(1024).astype(np.float32)
|
| 12 |
|
| 13 |
# Use - model automatically discovers what's available
|
| 14 |
-
result = sm_classifier("
|
| 15 |
print(f"Prediction: {result['predictions']['predicted_class']}")
|
|
|
|
| 6 |
"erikhenriksson/sm-subgroup-classifier", trust_remote_code=True
|
| 7 |
)
|
| 8 |
|
| 9 |
+
available = sm_classifier._get_available_models()
|
| 10 |
+
print(f"Available models on HF Hub: {available}")
|
| 11 |
+
|
| 12 |
# create a random 1024 dimensional embedding
|
| 13 |
|
| 14 |
embedding = np.random.rand(1024).astype(np.float32)
|
| 15 |
|
| 16 |
# Use - model automatically discovers what's available
|
| 17 |
+
result = sm_classifier("fi", "na-nb", embedding)
|
| 18 |
print(f"Prediction: {result['predictions']['predicted_class']}")
|
modeling_sm_subgroup_classifier.py
CHANGED
|
@@ -18,39 +18,22 @@ class SmSubgroupClassifier(PreTrainedModel):
|
|
| 18 |
self._loaded_classifiers = {}
|
| 19 |
self.model_dir = None
|
| 20 |
|
| 21 |
-
def _get_available_models(self):
|
| 22 |
-
"""Discover available models by checking what directories exist"""
|
| 23 |
-
if not self.model_dir:
|
| 24 |
-
return []
|
| 25 |
-
|
| 26 |
-
available = []
|
| 27 |
-
if os.path.exists(self.model_dir):
|
| 28 |
-
for item in os.listdir(self.model_dir):
|
| 29 |
-
item_path = os.path.join(self.model_dir, item)
|
| 30 |
-
if os.path.isdir(item_path) and "_" in item:
|
| 31 |
-
# Check if it has the required files
|
| 32 |
-
required_files = ["model.pkl", "scaler.pkl", "metadata.pkl"]
|
| 33 |
-
if all(
|
| 34 |
-
os.path.exists(os.path.join(item_path, f))
|
| 35 |
-
for f in required_files
|
| 36 |
-
):
|
| 37 |
-
available.append(item)
|
| 38 |
-
return available
|
| 39 |
-
|
| 40 |
def _load_classifier(self, model_key):
|
| 41 |
"""Load a specific classifier by model key (e.g., 'en_OP-ob')"""
|
| 42 |
if model_key in self._loaded_classifiers:
|
| 43 |
return self._loaded_classifiers[model_key]
|
| 44 |
|
| 45 |
-
|
| 46 |
-
if model_key not in available_models:
|
| 47 |
raise ValueError(
|
| 48 |
-
f"Model '{model_key}' not available. Available: {available_models}"
|
| 49 |
)
|
| 50 |
|
| 51 |
# Path to classifier
|
| 52 |
classifier_path = os.path.join(self.model_dir, model_key)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
| 54 |
# Load components
|
| 55 |
classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
|
| 56 |
scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))
|
|
|
|
| 18 |
self._loaded_classifiers = {}
|
| 19 |
self.model_dir = None
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def _load_classifier(self, model_key):
|
| 22 |
"""Load a specific classifier by model key (e.g., 'en_OP-ob')"""
|
| 23 |
if model_key in self._loaded_classifiers:
|
| 24 |
return self._loaded_classifiers[model_key]
|
| 25 |
|
| 26 |
+
if model_key not in self.config.available_models:
|
|
|
|
| 27 |
raise ValueError(
|
| 28 |
+
f"Model '{model_key}' not available. Available: {self.config.available_models}"
|
| 29 |
)
|
| 30 |
|
| 31 |
# Path to classifier
|
| 32 |
classifier_path = os.path.join(self.model_dir, model_key)
|
| 33 |
|
| 34 |
+
if not os.path.exists(classifier_path):
|
| 35 |
+
raise FileNotFoundError(f"Classifier not found at {classifier_path}")
|
| 36 |
+
|
| 37 |
# Load components
|
| 38 |
classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
|
| 39 |
scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))
|