Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

config.json +1 -0
configuration_sm_subgroup_classifier.py +2 -1
example.py +4 -1
modeling_sm_subgroup_classifier.py +5 -22

config.json CHANGED Viewed

@@ -4,5 +4,6 @@
     "AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
     "AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
   },
   "model_type": "sm_subgroup_classifier"
 }

     "AutoConfig": "configuration_sm_subgroup_classifier.SmSubgroupClassifierConfig",
     "AutoModel": "modeling_sm_subgroup_classifier.SmSubgroupClassifier"
   },
+  "available_models": ["fi_na-nb"],
   "model_type": "sm_subgroup_classifier"
 }

configuration_sm_subgroup_classifier.py CHANGED Viewed

@@ -4,5 +4,6 @@ from transformers import PretrainedConfig
 class SmSubgroupClassifierConfig(PretrainedConfig):
     model_type = "sm_subgroup_classifier"
-    def __init__(self, **kwargs):
         super().__init__(**kwargs)

 class SmSubgroupClassifierConfig(PretrainedConfig):
     model_type = "sm_subgroup_classifier"
+    def __init__(self, available_models=None, **kwargs):
         super().__init__(**kwargs)
+        self.available_models = available_models or []

example.py CHANGED Viewed

@@ -6,10 +6,13 @@ sm_classifier = AutoModel.from_pretrained(
     "erikhenriksson/sm-subgroup-classifier", trust_remote_code=True
 )
 # create a random 1024 dimensional embedding
 embedding = np.random.rand(1024).astype(np.float32)
 # Use - model automatically discovers what's available
-result = sm_classifier("en", "OP-ob", embedding)
 print(f"Prediction: {result['predictions']['predicted_class']}")

     "erikhenriksson/sm-subgroup-classifier", trust_remote_code=True
 )
+available = sm_classifier._get_available_models()
+print(f"Available models on HF Hub: {available}")
 # create a random 1024 dimensional embedding
 embedding = np.random.rand(1024).astype(np.float32)
 # Use - model automatically discovers what's available
+result = sm_classifier("fi", "na-nb", embedding)
 print(f"Prediction: {result['predictions']['predicted_class']}")

modeling_sm_subgroup_classifier.py CHANGED Viewed

@@ -18,39 +18,22 @@ class SmSubgroupClassifier(PreTrainedModel):
         self._loaded_classifiers = {}
         self.model_dir = None
-    def _get_available_models(self):
-        """Discover available models by checking what directories exist"""
-        if not self.model_dir:
-            return []
-        available = []
-        if os.path.exists(self.model_dir):
-            for item in os.listdir(self.model_dir):
-                item_path = os.path.join(self.model_dir, item)
-                if os.path.isdir(item_path) and "_" in item:
-                    # Check if it has the required files
-                    required_files = ["model.pkl", "scaler.pkl", "metadata.pkl"]
-                    if all(
-                        os.path.exists(os.path.join(item_path, f))
-                        for f in required_files
-                    ):
-                        available.append(item)
-        return available
     def _load_classifier(self, model_key):
         """Load a specific classifier by model key (e.g., 'en_OP-ob')"""
         if model_key in self._loaded_classifiers:
             return self._loaded_classifiers[model_key]
-        available_models = self._get_available_models()
-        if model_key not in available_models:
             raise ValueError(
-                f"Model '{model_key}' not available. Available: {available_models}"
             )
         # Path to classifier
         classifier_path = os.path.join(self.model_dir, model_key)
         # Load components
         classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
         scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))

         self._loaded_classifiers = {}
         self.model_dir = None
     def _load_classifier(self, model_key):
         """Load a specific classifier by model key (e.g., 'en_OP-ob')"""
         if model_key in self._loaded_classifiers:
             return self._loaded_classifiers[model_key]
+        if model_key not in self.config.available_models:
             raise ValueError(
+                f"Model '{model_key}' not available. Available: {self.config.available_models}"
             )
         # Path to classifier
         classifier_path = os.path.join(self.model_dir, model_key)
+        if not os.path.exists(classifier_path):
+            raise FileNotFoundError(f"Classifier not found at {classifier_path}")
         # Load components
         classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
         scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))