Mahesh2841
/

777_test

@@ -1,19 +1,18 @@
 """
-SafeGenerationModel
--------------------
-Runtime-agnostic toxicity wrapper for ANY causal-LM on Hugging Face.
-Add in `config.json`:
-  "auto_map": {"AutoModelForCausalLM": "custom_modeling.SafeGenerationModel"}
-Requires:
-  • toxic.keras   –  a Keras model that outputs sigmoid-probability for "toxic"
-  • transformers  –  >= 4.38
-  • tensorflow    –  for the classifier
 """
 import importlib
-from types import MethodType
 from functools import lru_cache
 import torch
@@ -23,34 +22,33 @@ from huggingface_hub import hf_hub_download
 # ---------------------------------------------------------------------
-# 1)  MIXIN  –- all toxicity logic lives here
 # ---------------------------------------------------------------------
 class _SafeGenerationMixin:
-    """
-    Mixin that overrides `generate()` to filter toxic prompts / outputs.
-    Must appear *before* the real base LM class in the MRO.
-    """
-    _toxicity_model = None           # lazy-loaded TF model
-    _tox_threshold = 0.6             # edit if needed
     _safe_message = (
         "Response is toxic, please be kind to yourself and others."
     )
     _tokenizer = None
-    # -------------------- utilities --------------------
     @property
     def _tox_model(self):
-        """Load the `.keras` model the first time we need it."""
         if self._toxicity_model is None:
             path = hf_hub_download(
                 repo_id=self.config.name_or_path,
                 filename="toxic.keras",
             )
-            self._toxicity_model = tf.keras.models.load_model(path, compile=False)
         return self._toxicity_model
-    def _load_tokenizer(self):
         if self._tokenizer is None:
             try:
                 self._tokenizer = transformers.AutoTokenizer.from_pretrained(
@@ -59,15 +57,16 @@ class _SafeGenerationMixin:
             except Exception:
                 pass
     def _is_toxic(self, text: str) -> bool:
         if not text.strip():
             return False
         prob = float(self._tox_model.predict([text])[0, 0])
         return prob >= self._tox_threshold
     def _safe_ids(self, length: int | None = None) -> torch.LongTensor:
-        """Return token IDs for the safe-message, padded / truncated to *length*."""
-        self._load_tokenizer()
         if self._tokenizer is None:
             raise RuntimeError("Tokenizer unavailable for safe-message encoding.")
         ids = self._tokenizer(self._safe_message, return_tensors="pt")["input_ids"][0]
@@ -86,92 +85,99 @@ class _SafeGenerationMixin:
                 ids = ids[:length]
         return ids.to(self.device)
-    # -------------------- override generate --------------------
     def generate(self, *args, **kwargs):
-        self._load_tokenizer()
-        # 1)  Decode prompt → toxicity check
-        prompt_text = None
         if self._tokenizer is not None:
             if "input_ids" in kwargs:
-                prompt_text = self._tokenizer.decode(
                     kwargs["input_ids"][0].tolist(), skip_special_tokens=True
                 )
             elif args:
-                prompt_text = self._tokenizer.decode(
                     args[0][0].tolist(), skip_special_tokens=True
                 )
-        if prompt_text and self._is_toxic(prompt_text):
             return self._safe_ids().unsqueeze(0)
-        # 2)  Normal generation  (super() == real LM class)
-        outputs = super().generate(*args, **kwargs)
-        # 3)  Toxicity check on completions
         if self._tokenizer is None:
-            return outputs  # cannot decode → skip
-        outs_cpu = outputs.detach().cpu()
-        safe_batches = []
-        for seq in outs_cpu:
             txt = self._tokenizer.decode(seq.tolist(), skip_special_tokens=True)
             if self._is_toxic(txt):
-                safe_batches.append(self._safe_ids(length=seq.size(0)))
             else:
-                safe_batches.append(seq)
-        return torch.stack(safe_batches, dim=0).to(self.device)
 # ---------------------------------------------------------------------
-# 2)  Helper: find the REAL base class for this config
 # ---------------------------------------------------------------------
 @lru_cache(None)
 def _get_base_cls(arch_name: str):
-    """
-    Map 'LlamaForCausalLM'  → transformers.LlamaForCausalLM (etc.).
-    Tries top-level attr first, then imports module heuristically.
-    """
     if hasattr(transformers, arch_name):
         return getattr(transformers, arch_name)
-    # Fallback: derive submodule from pattern `xxxForCausalLM`
     stem = arch_name.replace("ForCausalLM", "").lower()
-    module_try = f"transformers.models.{stem}.modeling_{stem}"
     try:
-        mod = importlib.import_module(module_try)
         return getattr(mod, arch_name)
     except Exception as e:
-        raise ValueError(
-            f"[SafeGeneration] Could not resolve base class for '{arch_name}': {e}"
-        ) from e
 # ---------------------------------------------------------------------
-# 3)  Dispatcher class – what HF actually instantiates
 # ---------------------------------------------------------------------
 class SafeGenerationModel:
     """
-    Factory / thin wrapper.  HF instantiates *this*, passing `config`.
-    We inspect `config.architectures[0]`, build a
-    (SafeMixin, RealBaseClass) dynamic subclass, and return an instance.
-    """
-    def __new__(cls, config, *args, **kwargs):
-        if not getattr(config, "architectures", None):
-            raise ValueError("`config.architectures` missing – cannot wrap model.")
-        base_cls = _get_base_cls(config.architectures[0])
-        # Build dynamic subclass only once per *base_cls* (memoised by lru_cache + closure)
-        DynamicSafeCls = _make_dynamic_cls(base_cls)
-        # Finally create and return the actual model instance
-        return DynamicSafeCls(config, *args, **kwargs)
-# -- internal cache to avoid re-creating identical classes ----------------
-@lru_cache(None)
-def _make_dynamic_cls(base_cls):
-    name = f"SafeGeneration_{base_cls.__name__}"
-    return type(name, (_SafeGenerationMixin, base_cls), {})

 """
+custom_modeling.py
+------------------
+Model-agnostic toxicity wrapper for any Hugging Face causal-LM.
+Add (or keep) in your config.json:
+  "auto_map": {
+    "AutoModelForCausalLM": "custom_modeling.SafeGenerationModel"
+  }
+Files that must live in the repo alongside this script:
+  • toxic.keras  – Keras classifier (sigmoid output: toxic prob)
 """
 import importlib
 from functools import lru_cache
 import torch
 # ---------------------------------------------------------------------
+# 1)  MIXIN – all toxicity logic lives here
 # ---------------------------------------------------------------------
 class _SafeGenerationMixin:
+    """Mixin that overrides .generate() to filter toxic prompts / outputs."""
+    _toxicity_model = None
+    _tox_threshold = 0.6
     _safe_message = (
         "Response is toxic, please be kind to yourself and others."
     )
     _tokenizer = None
+    # ----- helper: load classifier on first use -----------------------
     @property
     def _tox_model(self):
         if self._toxicity_model is None:
             path = hf_hub_download(
                 repo_id=self.config.name_or_path,
                 filename="toxic.keras",
             )
+            self._toxicity_model = tf.keras.models.load_model(
+                path, compile=False
+            )
         return self._toxicity_model
+    # ----- helper: load tokenizer (once) ------------------------------
+    def _ensure_tokenizer(self):
         if self._tokenizer is None:
             try:
                 self._tokenizer = transformers.AutoTokenizer.from_pretrained(
             except Exception:
                 pass
+    # ----- helper: tox check -----------------------------------------
     def _is_toxic(self, text: str) -> bool:
         if not text.strip():
             return False
         prob = float(self._tox_model.predict([text])[0, 0])
         return prob >= self._tox_threshold
+    # ----- helper: safe token ids ------------------------------------
     def _safe_ids(self, length: int | None = None) -> torch.LongTensor:
+        self._ensure_tokenizer()
         if self._tokenizer is None:
             raise RuntimeError("Tokenizer unavailable for safe-message encoding.")
         ids = self._tokenizer(self._safe_message, return_tensors="pt")["input_ids"][0]
                 ids = ids[:length]
         return ids.to(self.device)
+    # ----- override generate() ---------------------------------------
     def generate(self, *args, **kwargs):
+        self._ensure_tokenizer()
+        # 1) prompt toxicity
+        prompt_txt = None
         if self._tokenizer is not None:
             if "input_ids" in kwargs:
+                prompt_txt = self._tokenizer.decode(
                     kwargs["input_ids"][0].tolist(), skip_special_tokens=True
                 )
             elif args:
+                prompt_txt = self._tokenizer.decode(
                     args[0][0].tolist(), skip_special_tokens=True
                 )
+        if prompt_txt and self._is_toxic(prompt_txt):
             return self._safe_ids().unsqueeze(0)
+        # 2) normal generation
+        output = super().generate(*args, **kwargs)
+        # 3) output toxicity
         if self._tokenizer is None:
+            return output
+        seqs = output.detach().cpu()
+        safe = []
+        for seq in seqs:
             txt = self._tokenizer.decode(seq.tolist(), skip_special_tokens=True)
             if self._is_toxic(txt):
+                safe.append(self._safe_ids(length=seq.size(0)))
             else:
+                safe.append(seq)
+        return torch.stack(safe, dim=0).to(self.device)
 # ---------------------------------------------------------------------
+# 2)  Resolve base class for the repo’s architecture string
 # ---------------------------------------------------------------------
 @lru_cache(None)
 def _get_base_cls(arch_name: str):
+    # direct attribute
     if hasattr(transformers, arch_name):
         return getattr(transformers, arch_name)
+    # heuristic import: e.g. LlamaForCausalLM -> transformers.models.llama.modeling_llama
     stem = arch_name.replace("ForCausalLM", "").lower()
+    module_path = f"transformers.models.{stem}.modeling_{stem}"
     try:
+        mod = importlib.import_module(module_path)
         return getattr(mod, arch_name)
     except Exception as e:
+        raise ValueError(f"Cannot resolve base class for '{arch_name}': {e}") from e
+@lru_cache(None)
+def _make_dynamic_cls(base_cls):
+    """Create (and cache) SafeGeneration_<Base> = (Mixin, Base)."""
+    return type(f"SafeGeneration_{base_cls.__name__}", (_SafeGenerationMixin, base_cls), {})
 # ---------------------------------------------------------------------
+# 3)  Dispatcher class – target in `auto_map`
 # ---------------------------------------------------------------------
 class SafeGenerationModel:
     """
+    Thin dispatcher used by Hugging Face AutoClass.
+    It implements only `from_pretrained()`: determine the true base
+    architecture, build the dynamic subclass, and defer loading to it.
+    """
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        # propagate trust_remote_code if caller set it
+        kwargs.setdefault("trust_remote_code", True)
+        # 1) load config to know arch string
+        config = transformers.AutoConfig.from_pretrained(
+            pretrained_model_name_or_path, **kwargs
+        )
+        if not getattr(config, "architectures", None):
+            raise ValueError("`config.architectures` missing; cannot wrap model.")
+        arch_name = config.architectures[0]
+        # 2) build / retrieve dynamic subclass
+        base_cls = _get_base_cls(arch_name)
+        SafeCls = _make_dynamic_cls(base_cls)
+        # 3) delegate full loading
+        return SafeCls.from_pretrained(
+            pretrained_model_name_or_path,
+            *model_args,
+            config=config,
+            **kwargs,
+        )