Spaces:

wandb
/

guardrails-genie

Runtime error

App Files Files Community

ash0ts commited on Nov 29, 2024

Commit

f4fda1c

1 Parent(s): fcae57e

rename to be more appropriate and add better test cases for pii

Browse files

Files changed (13) hide show

guardrails_genie/guardrails/{pii → entity_recognition}/__init__.py +0 -0
guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py +3 -0
guardrails_genie/guardrails/{banned_terms/llm_judge.py → entity_recognition/pii_examples/pii_benchmark.py} +0 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py +150 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py +42 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py +42 -0
guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py +43 -0
guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py → entity_recognition/presidio_entity_recognition_guardrail.py} +27 -27
guardrails_genie/guardrails/{pii/regex_pii_guardrail.py → entity_recognition/regex_entity_recognition_guardrail.py} +26 -26
guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py → entity_recognition/transformers_entity_recognition_guardrail.py} +34 -34
guardrails_genie/guardrails/pii/run_presidio_model.py +0 -36
guardrails_genie/guardrails/pii/run_regex_model.py +0 -21
guardrails_genie/guardrails/pii/run_transformers.py +0 -35

guardrails_genie/guardrails/{pii → entity_recognition}/__init__.py RENAMED Viewed

File without changes

guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py ADDED Viewed

	@@ -0,0 +1,3 @@

+## Word conssitentcy
+# - Scent -> Odor
+# - odour -> Odor

guardrails_genie/guardrails/{banned_terms/llm_judge.py → entity_recognition/pii_examples/pii_benchmark.py} RENAMED Viewed

File without changes

guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""
+Collection of PII test examples with expected outcomes for entity recognition testing.
+Each example includes the input text and expected entities to be detected.
+"""
+PII_TEST_EXAMPLES = [
+    {
+        "description": "Business Context - Employee Record",
+        "input_text": """
+Please update our records for employee John Smith:
+Email: [email protected]
+Phone: 123-456-7890
+SSN: 123-45-6789
+Emergency Contact: Mary Johnson (Tel: 098-765-4321)
+""",
+        "expected_entities": {
+            "GIVENNAME": ["John", "Mary"],
+            "SURNAME": ["Smith", "Johnson"],
+            "EMAIL": ["[email protected]"],
+            "PHONE_NUMBER": ["123-456-7890", "098-765-4321"],
+            "SOCIALNUM": ["123-45-6789"]
+        }
+    },
+    {
+        "description": "Meeting Notes with Attendees",
+        "input_text": """
+Meeting Notes - Project Alpha
+Date: 2024-03-15
+Attendees:
+- Sarah Williams ([email protected])
+- Robert Brown ([email protected])
+- Tom Wilson (555-0123-4567)
+Action Items:
+1. Sarah to review documentation
+2. Contact Bob at his alternate number: 777-888-9999
+""",
+        "expected_entities": {
+            "GIVENNAME": ["Sarah", "Robert", "Tom", "Bob"],
+            "SURNAME": ["Williams", "Brown", "Wilson"],
+            "EMAIL": ["[email protected]", "[email protected]"],
+            "PHONE_NUMBER": ["555-0123-4567", "777-888-9999"]
+        }
+    },
+    {
+        "description": "Medical Record",
+        "input_text": """
+Patient: Emma Thompson
+DOB: 05/15/1980
+Medical Record #: MR-12345
+Primary Care: Dr. James Wilson
+Contact: [email protected]
+Insurance ID: INS-987654321
+Emergency Contact: Michael Thompson (555-123-4567)
+""",
+        "expected_entities": {
+            "GIVENNAME": ["Emma", "James", "Michael"],
+            "SURNAME": ["Thompson", "Wilson", "Thompson"],
+            "EMAIL": ["[email protected]"],
+            "PHONE_NUMBER": ["555-123-4567"]
+        }
+    },
+    {
+        "description": "No PII Content",
+        "input_text": """
+Project Status Update:
+- All deliverables are on track
+- Budget is within limits
+- Next review scheduled for next week
+""",
+        "expected_entities": {}
+    },
+    {
+        "description": "Mixed Format Phone Numbers",
+        "input_text": """
+Contact Directory:
+Main Office: (555) 123-4567
+Support: 555.987.6543
+International: +1-555-321-7890
+Emergency: 555 444 3333
+""",
+        "expected_entities": {
+            "PHONE_NUMBER": [
+                "(555) 123-4567",
+                "555.987.6543",
+                "+1-555-321-7890",
+                "555 444 3333"
+            ]
+        }
+    }
+]
+# Additional examples can be added to test specific edge cases or formats
+EDGE_CASE_EXAMPLES = [
+    {
+        "description": "Mixed Case and Special Characters",
+        "input_text": """
+[email protected]
+[email protected]
+[email protected]
+""",
+        "expected_entities": {
+            "EMAIL": [
+                "[email protected]",
+                "[email protected]",
+                "[email protected]"
+            ],
+            "GIVENNAME": ["John", "Jane", "Bob"],
+            "SURNAME": ["Doe", "Smith", "Jones"]
+        }
+    }
+]
+def validate_entities(detected: dict, expected: dict) -> bool:
+    """Compare detected entities with expected entities"""
+    if set(detected.keys()) != set(expected.keys()):
+        return False
+    return all(set(detected[k]) == set(expected[k]) for k in expected.keys())
+def run_test_case(guardrail, test_case, test_type="Main"):
+    """Run a single test case and print results"""
+    print(f"\n{test_type} Test Case: {test_case['description']}")
+    print("-" * 50)
+    result = guardrail.guard(test_case['input_text'])
+    expected = test_case['expected_entities']
+    # Validate results
+    matches = validate_entities(result.detected_entities, expected)
+    print(f"Test Status: {'✓ PASS' if matches else '✗ FAIL'}")
+    print(f"Contains PII: {result.contains_entities}")
+    if not matches:
+        print("\nEntity Comparison:")
+        all_entity_types = set(list(result.detected_entities.keys()) + list(expected.keys()))
+        for entity_type in all_entity_types:
+            detected = set(result.detected_entities.get(entity_type, []))
+            expected_set = set(expected.get(entity_type, []))
+            print(f"\nEntity Type: {entity_type}")
+            print(f"  Expected: {sorted(expected_set)}")
+            print(f"  Detected: {sorted(detected)}")
+            if detected != expected_set:
+                print(f"  Missing: {sorted(expected_set - detected)}")
+                print(f"  Extra: {sorted(detected - expected_set)}")
+    if result.anonymized_text:
+        print(f"\nAnonymized Text:\n{result.anonymized_text}")
+    return matches

guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from guardrails_genie.guardrails.entity_recognition.presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-presidio-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = PresidioEntityRecognitionGuardrail(
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-regex-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = RegexEntityRecognitionGuardrail(
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from guardrails_genie.guardrails.entity_recognition.transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
+from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
+import weave
+def test_pii_detection():
+    """Test PII detection scenarios using predefined test cases"""
+    weave.init("guardrails-genie-pii-transformers-pipeline-model")
+    # Create the guardrail with default entities and anonymization enabled
+    pii_guardrail = TransformersEntityRecognitionGuardrail(
+        selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "PHONE_NUMBER", "SOCIALNUM"],
+        should_anonymize=True,
+        show_available_entities=True
+    )
+    # Test statistics
+    total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
+    passed_tests = 0
+    # Test main PII examples
+    print("\nRunning Main PII Tests")
+    print("=" * 80)
+    for test_case in PII_TEST_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case):
+            passed_tests += 1
+    # Test edge cases
+    print("\nRunning Edge Cases")
+    print("=" * 80)
+    for test_case in EDGE_CASE_EXAMPLES:
+        if run_test_case(pii_guardrail, test_case, "Edge"):
+            passed_tests += 1
+    # Print summary
+    print("\nTest Summary")
+    print("=" * 80)
+    print(f"Total Tests: {total_tests}")
+    print(f"Passed: {passed_tests}")
+    print(f"Failed: {total_tests - passed_tests}")
+    print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
+if __name__ == "__main__":
+    test_pii_detection()

guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py → entity_recognition/presidio_entity_recognition_guardrail.py} RENAMED Viewed

@@ -7,19 +7,19 @@ from presidio_anonymizer import AnonymizerEngine
 from ..base import Guardrail
-class PresidioPIIGuardrailResponse(BaseModel):
-    contains_pii: bool
-    detected_pii_types: Dict[str, List[str]]
     explanation: str
     anonymized_text: Optional[str] = None
-class PresidioPIIGuardrailSimpleResponse(BaseModel):
-    contains_pii: bool
     explanation: str
     anonymized_text: Optional[str] = None
 #TODO: Add support for transformers workflow and not just Spacy
-class PresidioPIIGuardrail(Guardrail):
     @staticmethod
     def get_available_entities() -> List[str]:
         registry = RecognizerRegistry()
@@ -103,15 +103,15 @@ class PresidioPIIGuardrail(Guardrail):
         )
     @weave.op()
-    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioPIIGuardrailResponse | PresidioPIIGuardrailSimpleResponse:
         """
-        Check if the input prompt contains any PII using Presidio.
         Args:
             prompt: The text to analyze
-            return_detected_types: If True, returns detailed PII type information
         """
-        # Analyze text for PII
         analyzer_results = self.analyzer.analyze(
             text=prompt,
             entities=self.selected_entities,
@@ -119,31 +119,31 @@ class PresidioPIIGuardrail(Guardrail):
         )
         # Group results by entity type
-        detected_pii = {}
         for result in analyzer_results:
             entity_type = result.entity_type
             text_slice = prompt[result.start:result.end]
-            if entity_type not in detected_pii:
-                detected_pii[entity_type] = []
-            detected_pii[entity_type].append(text_slice)
         # Create explanation
         explanation_parts = []
-        if detected_pii:
-            explanation_parts.append("Found the following PII in the text:")
-            for pii_type, instances in detected_pii.items():
-                explanation_parts.append(f"- {pii_type}: {len(instances)} instance(s)")
         else:
-            explanation_parts.append("No PII detected in the text.")
         # Add information about what was checked
-        explanation_parts.append("\nChecked for these PII types:")
         for entity in self.selected_entities:
             explanation_parts.append(f"- {entity}")
         # Anonymize if requested
         anonymized_text = None
-        if self.should_anonymize and detected_pii:
             anonymized_result = self.anonymizer.anonymize(
                 text=prompt,
                 analyzer_results=analyzer_results
@@ -151,19 +151,19 @@ class PresidioPIIGuardrail(Guardrail):
             anonymized_text = anonymized_result.text
         if return_detected_types:
-            return PresidioPIIGuardrailResponse(
-                contains_pii=bool(detected_pii),
-                detected_pii_types=detected_pii,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
-            return PresidioPIIGuardrailSimpleResponse(
-                contains_pii=bool(detected_pii),
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
-    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioPIIGuardrailResponse | PresidioPIIGuardrailSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

 from ..base import Guardrail
+class PresidioEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, List[str]]
     explanation: str
     anonymized_text: Optional[str] = None
+class PresidioEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
     explanation: str
     anonymized_text: Optional[str] = None
 #TODO: Add support for transformers workflow and not just Spacy
+class PresidioEntityRecognitionGuardrail(Guardrail):
     @staticmethod
     def get_available_entities() -> List[str]:
         registry = RecognizerRegistry()
         )
     @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
         """
+        Check if the input prompt contains any entities using Presidio.
         Args:
             prompt: The text to analyze
+            return_detected_types: If True, returns detailed entity type information
         """
+        # Analyze text for entities
         analyzer_results = self.analyzer.analyze(
             text=prompt,
             entities=self.selected_entities,
         )
         # Group results by entity type
+        detected_entities = {}
         for result in analyzer_results:
             entity_type = result.entity_type
             text_slice = prompt[result.start:result.end]
+            if entity_type not in detected_entities:
+                detected_entities[entity_type] = []
+            detected_entities[entity_type].append(text_slice)
         # Create explanation
         explanation_parts = []
+        if detected_entities:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, instances in detected_entities.items():
+                explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
         else:
+            explanation_parts.append("No entities detected in the text.")
         # Add information about what was checked
+        explanation_parts.append("\nChecked for these entity types:")
         for entity in self.selected_entities:
             explanation_parts.append(f"- {entity}")
         # Anonymize if requested
         anonymized_text = None
+        if self.should_anonymize and detected_entities:
             anonymized_result = self.anonymizer.anonymize(
                 text=prompt,
                 analyzer_results=analyzer_results
             anonymized_text = anonymized_result.text
         if return_detected_types:
+            return PresidioEntityRecognitionResponse(
+                contains_entities=bool(detected_entities),
+                detected_entities=detected_entities,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
+            return PresidioEntityRecognitionSimpleResponse(
+                contains_entities=bool(detected_entities),
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

guardrails_genie/guardrails/{pii/regex_pii_guardrail.py → entity_recognition/regex_entity_recognition_guardrail.py} RENAMED Viewed

@@ -7,25 +7,25 @@ from ...regex_model import RegexModel
 from ..base import Guardrail
-class RegexPIIGuardrailResponse(BaseModel):
-    contains_pii: bool
-    detected_pii_types: Dict[str, list[str]]
     explanation: str
     anonymized_text: Optional[str] = None
-class RegexPIIGuardrailSimpleResponse(BaseModel):
-    contains_pii: bool
     explanation: str
     anonymized_text: Optional[str] = None
-class RegexPIIGuardrail(Guardrail):
     regex_model: RegexModel
     patterns: Dict[str, str] = {}
     should_anonymize: bool = False
-    DEFAULT_PII_PATTERNS: ClassVar[Dict[str, str]] = {
         "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
         "phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
         "ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
@@ -41,7 +41,7 @@ class RegexPIIGuardrail(Guardrail):
     def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
         patterns = {}
         if use_defaults:
-            patterns = self.DEFAULT_PII_PATTERNS.copy()
         if kwargs.get("patterns"):
             patterns.update(kwargs["patterns"])
@@ -56,30 +56,30 @@ class RegexPIIGuardrail(Guardrail):
         )
     @weave.op()
-    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
         """
-        Check if the input prompt contains any PII based on the regex patterns.
         Args:
-            prompt: Input text to check for PII
-            return_detected_types: If True, returns detailed PII type information
         Returns:
-            RegexPIIGuardrailResponse or RegexPIIGuardrailSimpleResponse containing PII detection results
         """
         result = self.regex_model.check(prompt)
         # Create detailed explanation
         explanation_parts = []
         if result.matched_patterns:
-            explanation_parts.append("Found the following PII in the text:")
-            for pii_type, matches in result.matched_patterns.items():
-                explanation_parts.append(f"- {pii_type}: {len(matches)} instance(s)")
         else:
-            explanation_parts.append("No PII detected in the text.")
         if result.failed_patterns:
-            explanation_parts.append("\nChecked but did not find these PII types:")
             for pattern in result.failed_patterns:
                 explanation_parts.append(f"- {pattern}")
@@ -87,25 +87,25 @@ class RegexPIIGuardrail(Guardrail):
         anonymized_text = None
         if getattr(self, 'should_anonymize', False) and result.matched_patterns:
             anonymized_text = prompt
-            for pii_type, matches in result.matched_patterns.items():
                 for match in matches:
-                    replacement = f"[{pii_type.upper()}]"
                     anonymized_text = anonymized_text.replace(match, replacement)
         if return_detected_types:
-            return RegexPIIGuardrailResponse(
-                contains_pii=not result.passed,
-                detected_pii_types=result.matched_patterns,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
-            return RegexPIIGuardrailSimpleResponse(
-                contains_pii=not result.passed,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
-    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexPIIGuardrailResponse | RegexPIIGuardrailSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

 from ..base import Guardrail
+class RegexEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, list[str]]
     explanation: str
     anonymized_text: Optional[str] = None
+class RegexEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
     explanation: str
     anonymized_text: Optional[str] = None
+class RegexEntityRecognitionGuardrail(Guardrail):
     regex_model: RegexModel
     patterns: Dict[str, str] = {}
     should_anonymize: bool = False
+    DEFAULT_PATTERNS: ClassVar[Dict[str, str]] = {
         "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
         "phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
         "ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
     def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
         patterns = {}
         if use_defaults:
+            patterns = self.DEFAULT_PATTERNS.copy()
         if kwargs.get("patterns"):
             patterns.update(kwargs["patterns"])
         )
     @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
         """
+        Check if the input prompt contains any entities based on the regex patterns.
         Args:
+            prompt: Input text to check for entities
+            return_detected_types: If True, returns detailed entity type information
         Returns:
+            RegexEntityRecognitionResponse or RegexEntityRecognitionSimpleResponse containing detection results
         """
         result = self.regex_model.check(prompt)
         # Create detailed explanation
         explanation_parts = []
         if result.matched_patterns:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, matches in result.matched_patterns.items():
+                explanation_parts.append(f"- {entity_type}: {len(matches)} instance(s)")
         else:
+            explanation_parts.append("No entities detected in the text.")
         if result.failed_patterns:
+            explanation_parts.append("\nChecked but did not find these entity types:")
             for pattern in result.failed_patterns:
                 explanation_parts.append(f"- {pattern}")
         anonymized_text = None
         if getattr(self, 'should_anonymize', False) and result.matched_patterns:
             anonymized_text = prompt
+            for entity_type, matches in result.matched_patterns.items():
                 for match in matches:
+                    replacement = f"[{entity_type.upper()}]"
                     anonymized_text = anonymized_text.replace(match, replacement)
         if return_detected_types:
+            return RegexEntityRecognitionResponse(
+                contains_entities=not result.passed,
+                detected_entities=result.matched_patterns,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
+            return RegexEntityRecognitionSimpleResponse(
+                contains_entities=not result.passed,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)

guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py → entity_recognition/transformers_entity_recognition_guardrail.py} RENAMED Viewed

@@ -5,19 +5,19 @@ from pydantic import BaseModel
 from ..base import Guardrail
 import weave
-class TransformersPipelinePIIGuardrailResponse(BaseModel):
-    contains_pii: bool
-    detected_pii_types: Dict[str, List[str]]
     explanation: str
     anonymized_text: Optional[str] = None
-class TransformersPipelinePIIGuardrailSimpleResponse(BaseModel):
-    contains_pii: bool
     explanation: str
     anonymized_text: Optional[str] = None
-class TransformersPipelinePIIGuardrail(Guardrail):
-    """Generic guardrail for detecting PII using any token classification model."""
     _pipeline: Optional[object] = None
     selected_entities: List[str]
@@ -82,7 +82,7 @@ class TransformersPipelinePIIGuardrail(Guardrail):
     def _print_available_entities(self, entities: List[str]):
         """Print all available entity types that can be detected by the model."""
-        print("\nAvailable PII entity types:")
         print("=" * 25)
         for entity in entities:
             print(f"- {entity}")
@@ -92,23 +92,23 @@ class TransformersPipelinePIIGuardrail(Guardrail):
         """Print all available entity types that can be detected by the model."""
         self._print_available_entities(self.available_entities)
-    def _detect_pii(self, text: str) -> Dict[str, List[str]]:
-        """Detect PII entities in the text using the pipeline."""
         results = self._pipeline(text)
         # Group findings by entity type
-        detected_pii = {}
         for entity in results:
             entity_type = entity['entity_group']
             if entity_type in self.selected_entities:
-                if entity_type not in detected_pii:
-                    detected_pii[entity_type] = []
-                detected_pii[entity_type].append(entity['word'])
-        return detected_pii
     def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
-        """Anonymize detected PII in text using the pipeline."""
         results = self._pipeline(text)
         # Sort entities by start position in reverse order to avoid offset issues
@@ -131,49 +131,49 @@ class TransformersPipelinePIIGuardrail(Guardrail):
         return ' '.join(result.split())
     @weave.op()
-    def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersPipelinePIIGuardrailResponse | TransformersPipelinePIIGuardrailSimpleResponse:
-        """Check if the input prompt contains any PII using Piiranha.
         Args:
             prompt: The text to analyze
-            return_detected_types: If True, returns detailed PII type information
             aggregate_redaction: If True, uses generic [redacted] instead of entity type
         """
-        # Detect PII
-        detected_pii = self._detect_pii(prompt)
         # Create explanation
         explanation_parts = []
-        if detected_pii:
-            explanation_parts.append("Found the following PII in the text:")
-            for pii_type, instances in detected_pii.items():
-                explanation_parts.append(f"- {pii_type}: {len(instances)} instance(s)")
         else:
-            explanation_parts.append("No PII detected in the text.")
-        explanation_parts.append("\nChecked for these PII types:")
         for entity in self.selected_entities:
             explanation_parts.append(f"- {entity}")
         # Anonymize if requested
         anonymized_text = None
-        if self.should_anonymize and detected_pii:
             anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
         if return_detected_types:
-            return TransformersPipelinePIIGuardrailResponse(
-                contains_pii=bool(detected_pii),
-                detected_pii_types=detected_pii,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
-            return TransformersPipelinePIIGuardrailSimpleResponse(
-                contains_pii=bool(detected_pii),
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
-    def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersPipelinePIIGuardrailResponse | TransformersPipelinePIIGuardrailSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)

 from ..base import Guardrail
 import weave
+class TransformersEntityRecognitionResponse(BaseModel):
+    contains_entities: bool
+    detected_entities: Dict[str, List[str]]
     explanation: str
     anonymized_text: Optional[str] = None
+class TransformersEntityRecognitionSimpleResponse(BaseModel):
+    contains_entities: bool
     explanation: str
     anonymized_text: Optional[str] = None
+class TransformersEntityRecognitionGuardrail(Guardrail):
+    """Generic guardrail for detecting entities using any token classification model."""
     _pipeline: Optional[object] = None
     selected_entities: List[str]
     def _print_available_entities(self, entities: List[str]):
         """Print all available entity types that can be detected by the model."""
+        print("\nAvailable entity types:")
         print("=" * 25)
         for entity in entities:
             print(f"- {entity}")
         """Print all available entity types that can be detected by the model."""
         self._print_available_entities(self.available_entities)
+    def _detect_entities(self, text: str) -> Dict[str, List[str]]:
+        """Detect entities in the text using the pipeline."""
         results = self._pipeline(text)
         # Group findings by entity type
+        detected_entities = {}
         for entity in results:
             entity_type = entity['entity_group']
             if entity_type in self.selected_entities:
+                if entity_type not in detected_entities:
+                    detected_entities[entity_type] = []
+                detected_entities[entity_type].append(entity['word'])
+        return detected_entities
     def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
+        """Anonymize detected entities in text using the pipeline."""
         results = self._pipeline(text)
         # Sort entities by start position in reverse order to avoid offset issues
         return ' '.join(result.split())
     @weave.op()
+    def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
+        """Check if the input prompt contains any entities using the transformer pipeline.
         Args:
             prompt: The text to analyze
+            return_detected_types: If True, returns detailed entity type information
             aggregate_redaction: If True, uses generic [redacted] instead of entity type
         """
+        # Detect entities
+        detected_entities = self._detect_entities(prompt)
         # Create explanation
         explanation_parts = []
+        if detected_entities:
+            explanation_parts.append("Found the following entities in the text:")
+            for entity_type, instances in detected_entities.items():
+                explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
         else:
+            explanation_parts.append("No entities detected in the text.")
+        explanation_parts.append("\nChecked for these entities:")
         for entity in self.selected_entities:
             explanation_parts.append(f"- {entity}")
         # Anonymize if requested
         anonymized_text = None
+        if self.should_anonymize and detected_entities:
             anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
         if return_detected_types:
+            return TransformersEntityRecognitionResponse(
+                contains_entities=bool(detected_entities),
+                detected_entities=detected_entities,
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
         else:
+            return TransformersEntityRecognitionSimpleResponse(
+                contains_entities=bool(detected_entities),
                 explanation="\n".join(explanation_parts),
                 anonymized_text=anonymized_text
             )
     @weave.op()
+    def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
         return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)

guardrails_genie/guardrails/pii/run_presidio_model.py DELETED Viewed

@@ -1,36 +0,0 @@
-from guardrails_genie.guardrails.pii.presidio_pii_guardrail import PresidioPIIGuardrail
-import weave
-def run_presidio_model():
-    weave.init("guardrails-genie-pii-presidio-model")
-    # Create the guardrail with default entities and anonymization enabled
-    pii_guardrail = PresidioPIIGuardrail(
-        selected_entities=["PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER"],
-        should_anonymize=True
-    )
-    # Check a prompt
-    prompt = "Please contact [email protected] or call 123-456-7890. My SSN is 123-45-6789"
-    result = pii_guardrail.guard(prompt)
-    print(result)
-    # Result will contain:
-    # - contains_pii: True
-    # - detected_pii_types: {
-    #     "EMAIL_ADDRESS": ["[email protected]"],
-    #     "PHONE_NUMBER": ["123-456-7890"],
-    #     "US_SSN": ["123-45-6789"]
-    # }
-    # - safe_to_process: False
-    # - explanation: Detailed explanation of findings
-    # - anonymized_text: "Please contact <EMAIL_ADDRESS> or call <PHONE_NUMBER>. My SSN is <US_SSN>"
-    # Example with no PII
-    safe_prompt = "The weather is nice today"
-    safe_result = pii_guardrail.guard(safe_prompt)
-    print("\nSafe prompt result:")
-    print(safe_result)
-if __name__ == "__main__":
-    run_presidio_model()

guardrails_genie/guardrails/pii/run_regex_model.py DELETED Viewed

@@ -1,21 +0,0 @@
-from guardrails_genie.guardrails.pii.regex_pii_guardrail import RegexPIIGuardrail
-import weave
-def run_regex_model():
-    weave.init("guardrails-genie-pii-regex-model")
-    # Create the guardrail
-    pii_guardrail = RegexPIIGuardrail(use_defaults=True, should_anonymize=True)
-    # Check a prompt
-    prompt = "Please contact [email protected] or call 123-456-7890"
-    result = pii_guardrail.guard(prompt)
-    print(result)
-    # Result will contain:
-    # - contains_pii: True
-    # - detected_pii_types: {"email": ["[email protected]"], "phone_number": ["123-456-7890"]}
-    # - safe_to_process: False
-    # - explanation: Detailed explanation of findings
-if __name__ == "__main__":
-    run_regex_model()

guardrails_genie/guardrails/pii/run_transformers.py DELETED Viewed

@@ -1,35 +0,0 @@
-from guardrails_genie.guardrails.pii.transformers_pipeline_guardrail import TransformersPipelinePIIGuardrail
-import weave
-def run_transformers_pipeline():
-    weave.init("guardrails-genie-pii-transformers-pipeline-model")
-    # Create the guardrail with default entities and anonymization enabled
-    pii_guardrail = TransformersPipelinePIIGuardrail(
-        selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "TELEPHONENUM", "SOCIALNUM", "PHONE_NUMBER"],
-        should_anonymize=True,
-        model_name="lakshyakh93/deberta_finetuned_pii",
-        show_available_entities=True
-    )
-    # Check a prompt
-    prompt = "Please contact John Smith at [email protected] or call 123-456-7890. My SSN is 123-45-6789"
-    result = pii_guardrail.guard(prompt, aggregate_redaction=False)
-    print(result)
-    # Result will contain:
-    # - contains_pii: True
-    # - detected_pii_types: {
-    #     "GIVENNAME": ["John"],
-    #     "SURNAME": ["Smith"],
-    #     "EMAIL": ["[email protected]"],
-    #     "TELEPHONENUM": ["123-456-7890"],
-    #     "SOCIALNUM": ["123-45-6789"]
-    # }
-    # - safe_to_process: False
-    # - explanation: Detailed explanation of findings
-    # - anonymized_text: "Please contact [redacted] [redacted] at [redacted] or call [redacted]. My SSN is [redacted]"
-if __name__ == "__main__":
-    run_transformers_pipeline()