Spaces:
Runtime error
Runtime error
rename to be more appropriate and add better test cases for pii
Browse files- guardrails_genie/guardrails/{pii β entity_recognition}/__init__.py +0 -0
- guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py +3 -0
- guardrails_genie/guardrails/{banned_terms/llm_judge.py β entity_recognition/pii_examples/pii_benchmark.py} +0 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py +150 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py +42 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py +42 -0
- guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py +43 -0
- guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py β entity_recognition/presidio_entity_recognition_guardrail.py} +27 -27
- guardrails_genie/guardrails/{pii/regex_pii_guardrail.py β entity_recognition/regex_entity_recognition_guardrail.py} +26 -26
- guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py β entity_recognition/transformers_entity_recognition_guardrail.py} +34 -34
- guardrails_genie/guardrails/pii/run_presidio_model.py +0 -36
- guardrails_genie/guardrails/pii/run_regex_model.py +0 -21
- guardrails_genie/guardrails/pii/run_transformers.py +0 -35
guardrails_genie/guardrails/{pii β entity_recognition}/__init__.py
RENAMED
|
File without changes
|
guardrails_genie/guardrails/entity_recognition/llm_judge_entity_recognition_guardrail.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Word conssitentcy
|
| 2 |
+
# - Scent -> Odor
|
| 3 |
+
# - odour -> Odor
|
guardrails_genie/guardrails/{banned_terms/llm_judge.py β entity_recognition/pii_examples/pii_benchmark.py}
RENAMED
|
File without changes
|
guardrails_genie/guardrails/entity_recognition/pii_examples/pii_test_examples.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Collection of PII test examples with expected outcomes for entity recognition testing.
|
| 3 |
+
Each example includes the input text and expected entities to be detected.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
PII_TEST_EXAMPLES = [
|
| 7 |
+
{
|
| 8 |
+
"description": "Business Context - Employee Record",
|
| 9 |
+
"input_text": """
|
| 10 |
+
Please update our records for employee John Smith:
|
| 11 |
+
Email: [email protected]
|
| 12 |
+
Phone: 123-456-7890
|
| 13 |
+
SSN: 123-45-6789
|
| 14 |
+
Emergency Contact: Mary Johnson (Tel: 098-765-4321)
|
| 15 |
+
""",
|
| 16 |
+
"expected_entities": {
|
| 17 |
+
"GIVENNAME": ["John", "Mary"],
|
| 18 |
+
"SURNAME": ["Smith", "Johnson"],
|
| 19 |
+
"EMAIL": ["[email protected]"],
|
| 20 |
+
"PHONE_NUMBER": ["123-456-7890", "098-765-4321"],
|
| 21 |
+
"SOCIALNUM": ["123-45-6789"]
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"description": "Meeting Notes with Attendees",
|
| 26 |
+
"input_text": """
|
| 27 |
+
Meeting Notes - Project Alpha
|
| 28 |
+
Date: 2024-03-15
|
| 29 |
+
Attendees:
|
| 30 |
+
- Sarah Williams ([email protected])
|
| 31 |
+
- Robert Brown ([email protected])
|
| 32 |
+
- Tom Wilson (555-0123-4567)
|
| 33 |
+
|
| 34 |
+
Action Items:
|
| 35 |
+
1. Sarah to review documentation
|
| 36 |
+
2. Contact Bob at his alternate number: 777-888-9999
|
| 37 |
+
""",
|
| 38 |
+
"expected_entities": {
|
| 39 |
+
"GIVENNAME": ["Sarah", "Robert", "Tom", "Bob"],
|
| 40 |
+
"SURNAME": ["Williams", "Brown", "Wilson"],
|
| 41 |
+
"EMAIL": ["[email protected]", "[email protected]"],
|
| 42 |
+
"PHONE_NUMBER": ["555-0123-4567", "777-888-9999"]
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"description": "Medical Record",
|
| 47 |
+
"input_text": """
|
| 48 |
+
Patient: Emma Thompson
|
| 49 |
+
DOB: 05/15/1980
|
| 50 |
+
Medical Record #: MR-12345
|
| 51 |
+
Primary Care: Dr. James Wilson
|
| 52 |
+
Contact: [email protected]
|
| 53 |
+
Insurance ID: INS-987654321
|
| 54 |
+
Emergency Contact: Michael Thompson (555-123-4567)
|
| 55 |
+
""",
|
| 56 |
+
"expected_entities": {
|
| 57 |
+
"GIVENNAME": ["Emma", "James", "Michael"],
|
| 58 |
+
"SURNAME": ["Thompson", "Wilson", "Thompson"],
|
| 59 |
+
"EMAIL": ["[email protected]"],
|
| 60 |
+
"PHONE_NUMBER": ["555-123-4567"]
|
| 61 |
+
}
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"description": "No PII Content",
|
| 65 |
+
"input_text": """
|
| 66 |
+
Project Status Update:
|
| 67 |
+
- All deliverables are on track
|
| 68 |
+
- Budget is within limits
|
| 69 |
+
- Next review scheduled for next week
|
| 70 |
+
""",
|
| 71 |
+
"expected_entities": {}
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"description": "Mixed Format Phone Numbers",
|
| 75 |
+
"input_text": """
|
| 76 |
+
Contact Directory:
|
| 77 |
+
Main Office: (555) 123-4567
|
| 78 |
+
Support: 555.987.6543
|
| 79 |
+
International: +1-555-321-7890
|
| 80 |
+
Emergency: 555 444 3333
|
| 81 |
+
""",
|
| 82 |
+
"expected_entities": {
|
| 83 |
+
"PHONE_NUMBER": [
|
| 84 |
+
"(555) 123-4567",
|
| 85 |
+
"555.987.6543",
|
| 86 |
+
"+1-555-321-7890",
|
| 87 |
+
"555 444 3333"
|
| 88 |
+
]
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
# Additional examples can be added to test specific edge cases or formats
|
| 94 |
+
EDGE_CASE_EXAMPLES = [
|
| 95 |
+
{
|
| 96 |
+
"description": "Mixed Case and Special Characters",
|
| 97 |
+
"input_text": """
|
| 98 | |
| 99 | |
| 100 | |
| 101 |
+
""",
|
| 102 |
+
"expected_entities": {
|
| 103 |
+
"EMAIL": [
|
| 104 |
+
"[email protected]",
|
| 105 |
+
"[email protected]",
|
| 106 | |
| 107 |
+
],
|
| 108 |
+
"GIVENNAME": ["John", "Jane", "Bob"],
|
| 109 |
+
"SURNAME": ["Doe", "Smith", "Jones"]
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
]
|
| 113 |
+
|
| 114 |
+
def validate_entities(detected: dict, expected: dict) -> bool:
|
| 115 |
+
"""Compare detected entities with expected entities"""
|
| 116 |
+
if set(detected.keys()) != set(expected.keys()):
|
| 117 |
+
return False
|
| 118 |
+
return all(set(detected[k]) == set(expected[k]) for k in expected.keys())
|
| 119 |
+
|
| 120 |
+
def run_test_case(guardrail, test_case, test_type="Main"):
|
| 121 |
+
"""Run a single test case and print results"""
|
| 122 |
+
print(f"\n{test_type} Test Case: {test_case['description']}")
|
| 123 |
+
print("-" * 50)
|
| 124 |
+
|
| 125 |
+
result = guardrail.guard(test_case['input_text'])
|
| 126 |
+
expected = test_case['expected_entities']
|
| 127 |
+
|
| 128 |
+
# Validate results
|
| 129 |
+
matches = validate_entities(result.detected_entities, expected)
|
| 130 |
+
|
| 131 |
+
print(f"Test Status: {'β PASS' if matches else 'β FAIL'}")
|
| 132 |
+
print(f"Contains PII: {result.contains_entities}")
|
| 133 |
+
|
| 134 |
+
if not matches:
|
| 135 |
+
print("\nEntity Comparison:")
|
| 136 |
+
all_entity_types = set(list(result.detected_entities.keys()) + list(expected.keys()))
|
| 137 |
+
for entity_type in all_entity_types:
|
| 138 |
+
detected = set(result.detected_entities.get(entity_type, []))
|
| 139 |
+
expected_set = set(expected.get(entity_type, []))
|
| 140 |
+
print(f"\nEntity Type: {entity_type}")
|
| 141 |
+
print(f" Expected: {sorted(expected_set)}")
|
| 142 |
+
print(f" Detected: {sorted(detected)}")
|
| 143 |
+
if detected != expected_set:
|
| 144 |
+
print(f" Missing: {sorted(expected_set - detected)}")
|
| 145 |
+
print(f" Extra: {sorted(detected - expected_set)}")
|
| 146 |
+
|
| 147 |
+
if result.anonymized_text:
|
| 148 |
+
print(f"\nAnonymized Text:\n{result.anonymized_text}")
|
| 149 |
+
|
| 150 |
+
return matches
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_presidio_model.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from guardrails_genie.guardrails.entity_recognition.presidio_entity_recognition_guardrail import PresidioEntityRecognitionGuardrail
|
| 2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
| 3 |
+
import weave
|
| 4 |
+
|
| 5 |
+
def test_pii_detection():
|
| 6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
| 7 |
+
weave.init("guardrails-genie-pii-presidio-model")
|
| 8 |
+
|
| 9 |
+
# Create the guardrail with default entities and anonymization enabled
|
| 10 |
+
pii_guardrail = PresidioEntityRecognitionGuardrail(
|
| 11 |
+
should_anonymize=True,
|
| 12 |
+
show_available_entities=True
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# Test statistics
|
| 16 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
| 17 |
+
passed_tests = 0
|
| 18 |
+
|
| 19 |
+
# Test main PII examples
|
| 20 |
+
print("\nRunning Main PII Tests")
|
| 21 |
+
print("=" * 80)
|
| 22 |
+
for test_case in PII_TEST_EXAMPLES:
|
| 23 |
+
if run_test_case(pii_guardrail, test_case):
|
| 24 |
+
passed_tests += 1
|
| 25 |
+
|
| 26 |
+
# Test edge cases
|
| 27 |
+
print("\nRunning Edge Cases")
|
| 28 |
+
print("=" * 80)
|
| 29 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
| 30 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
| 31 |
+
passed_tests += 1
|
| 32 |
+
|
| 33 |
+
# Print summary
|
| 34 |
+
print("\nTest Summary")
|
| 35 |
+
print("=" * 80)
|
| 36 |
+
print(f"Total Tests: {total_tests}")
|
| 37 |
+
print(f"Passed: {passed_tests}")
|
| 38 |
+
print(f"Failed: {total_tests - passed_tests}")
|
| 39 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
test_pii_detection()
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_regex_model.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from guardrails_genie.guardrails.entity_recognition.regex_entity_recognition_guardrail import RegexEntityRecognitionGuardrail
|
| 2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
| 3 |
+
import weave
|
| 4 |
+
|
| 5 |
+
def test_pii_detection():
|
| 6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
| 7 |
+
weave.init("guardrails-genie-pii-regex-model")
|
| 8 |
+
|
| 9 |
+
# Create the guardrail with default entities and anonymization enabled
|
| 10 |
+
pii_guardrail = RegexEntityRecognitionGuardrail(
|
| 11 |
+
should_anonymize=True,
|
| 12 |
+
show_available_entities=True
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# Test statistics
|
| 16 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
| 17 |
+
passed_tests = 0
|
| 18 |
+
|
| 19 |
+
# Test main PII examples
|
| 20 |
+
print("\nRunning Main PII Tests")
|
| 21 |
+
print("=" * 80)
|
| 22 |
+
for test_case in PII_TEST_EXAMPLES:
|
| 23 |
+
if run_test_case(pii_guardrail, test_case):
|
| 24 |
+
passed_tests += 1
|
| 25 |
+
|
| 26 |
+
# Test edge cases
|
| 27 |
+
print("\nRunning Edge Cases")
|
| 28 |
+
print("=" * 80)
|
| 29 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
| 30 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
| 31 |
+
passed_tests += 1
|
| 32 |
+
|
| 33 |
+
# Print summary
|
| 34 |
+
print("\nTest Summary")
|
| 35 |
+
print("=" * 80)
|
| 36 |
+
print(f"Total Tests: {total_tests}")
|
| 37 |
+
print(f"Passed: {passed_tests}")
|
| 38 |
+
print(f"Failed: {total_tests - passed_tests}")
|
| 39 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
test_pii_detection()
|
guardrails_genie/guardrails/entity_recognition/pii_examples/run_transformers.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from guardrails_genie.guardrails.entity_recognition.transformers_entity_recognition_guardrail import TransformersEntityRecognitionGuardrail
|
| 2 |
+
from guardrails_genie.guardrails.entity_recognition.pii_examples.pii_test_examples import PII_TEST_EXAMPLES, EDGE_CASE_EXAMPLES, run_test_case, validate_entities
|
| 3 |
+
import weave
|
| 4 |
+
|
| 5 |
+
def test_pii_detection():
|
| 6 |
+
"""Test PII detection scenarios using predefined test cases"""
|
| 7 |
+
weave.init("guardrails-genie-pii-transformers-pipeline-model")
|
| 8 |
+
|
| 9 |
+
# Create the guardrail with default entities and anonymization enabled
|
| 10 |
+
pii_guardrail = TransformersEntityRecognitionGuardrail(
|
| 11 |
+
selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "PHONE_NUMBER", "SOCIALNUM"],
|
| 12 |
+
should_anonymize=True,
|
| 13 |
+
show_available_entities=True
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
# Test statistics
|
| 17 |
+
total_tests = len(PII_TEST_EXAMPLES) + len(EDGE_CASE_EXAMPLES)
|
| 18 |
+
passed_tests = 0
|
| 19 |
+
|
| 20 |
+
# Test main PII examples
|
| 21 |
+
print("\nRunning Main PII Tests")
|
| 22 |
+
print("=" * 80)
|
| 23 |
+
for test_case in PII_TEST_EXAMPLES:
|
| 24 |
+
if run_test_case(pii_guardrail, test_case):
|
| 25 |
+
passed_tests += 1
|
| 26 |
+
|
| 27 |
+
# Test edge cases
|
| 28 |
+
print("\nRunning Edge Cases")
|
| 29 |
+
print("=" * 80)
|
| 30 |
+
for test_case in EDGE_CASE_EXAMPLES:
|
| 31 |
+
if run_test_case(pii_guardrail, test_case, "Edge"):
|
| 32 |
+
passed_tests += 1
|
| 33 |
+
|
| 34 |
+
# Print summary
|
| 35 |
+
print("\nTest Summary")
|
| 36 |
+
print("=" * 80)
|
| 37 |
+
print(f"Total Tests: {total_tests}")
|
| 38 |
+
print(f"Passed: {passed_tests}")
|
| 39 |
+
print(f"Failed: {total_tests - passed_tests}")
|
| 40 |
+
print(f"Success Rate: {(passed_tests/total_tests)*100:.1f}%")
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
test_pii_detection()
|
guardrails_genie/guardrails/{pii/presidio_pii_guardrail.py β entity_recognition/presidio_entity_recognition_guardrail.py}
RENAMED
|
@@ -7,19 +7,19 @@ from presidio_anonymizer import AnonymizerEngine
|
|
| 7 |
|
| 8 |
from ..base import Guardrail
|
| 9 |
|
| 10 |
-
class
|
| 11 |
-
|
| 12 |
-
|
| 13 |
explanation: str
|
| 14 |
anonymized_text: Optional[str] = None
|
| 15 |
|
| 16 |
-
class
|
| 17 |
-
|
| 18 |
explanation: str
|
| 19 |
anonymized_text: Optional[str] = None
|
| 20 |
|
| 21 |
#TODO: Add support for transformers workflow and not just Spacy
|
| 22 |
-
class
|
| 23 |
@staticmethod
|
| 24 |
def get_available_entities() -> List[str]:
|
| 25 |
registry = RecognizerRegistry()
|
|
@@ -103,15 +103,15 @@ class PresidioPIIGuardrail(Guardrail):
|
|
| 103 |
)
|
| 104 |
|
| 105 |
@weave.op()
|
| 106 |
-
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
| 107 |
"""
|
| 108 |
-
Check if the input prompt contains any
|
| 109 |
|
| 110 |
Args:
|
| 111 |
prompt: The text to analyze
|
| 112 |
-
return_detected_types: If True, returns detailed
|
| 113 |
"""
|
| 114 |
-
# Analyze text for
|
| 115 |
analyzer_results = self.analyzer.analyze(
|
| 116 |
text=prompt,
|
| 117 |
entities=self.selected_entities,
|
|
@@ -119,31 +119,31 @@ class PresidioPIIGuardrail(Guardrail):
|
|
| 119 |
)
|
| 120 |
|
| 121 |
# Group results by entity type
|
| 122 |
-
|
| 123 |
for result in analyzer_results:
|
| 124 |
entity_type = result.entity_type
|
| 125 |
text_slice = prompt[result.start:result.end]
|
| 126 |
-
if entity_type not in
|
| 127 |
-
|
| 128 |
-
|
| 129 |
|
| 130 |
# Create explanation
|
| 131 |
explanation_parts = []
|
| 132 |
-
if
|
| 133 |
-
explanation_parts.append("Found the following
|
| 134 |
-
for
|
| 135 |
-
explanation_parts.append(f"- {
|
| 136 |
else:
|
| 137 |
-
explanation_parts.append("No
|
| 138 |
|
| 139 |
# Add information about what was checked
|
| 140 |
-
explanation_parts.append("\nChecked for these
|
| 141 |
for entity in self.selected_entities:
|
| 142 |
explanation_parts.append(f"- {entity}")
|
| 143 |
|
| 144 |
# Anonymize if requested
|
| 145 |
anonymized_text = None
|
| 146 |
-
if self.should_anonymize and
|
| 147 |
anonymized_result = self.anonymizer.anonymize(
|
| 148 |
text=prompt,
|
| 149 |
analyzer_results=analyzer_results
|
|
@@ -151,19 +151,19 @@ class PresidioPIIGuardrail(Guardrail):
|
|
| 151 |
anonymized_text = anonymized_result.text
|
| 152 |
|
| 153 |
if return_detected_types:
|
| 154 |
-
return
|
| 155 |
-
|
| 156 |
-
|
| 157 |
explanation="\n".join(explanation_parts),
|
| 158 |
anonymized_text=anonymized_text
|
| 159 |
)
|
| 160 |
else:
|
| 161 |
-
return
|
| 162 |
-
|
| 163 |
explanation="\n".join(explanation_parts),
|
| 164 |
anonymized_text=anonymized_text
|
| 165 |
)
|
| 166 |
|
| 167 |
@weave.op()
|
| 168 |
-
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
| 169 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
|
|
|
| 7 |
|
| 8 |
from ..base import Guardrail
|
| 9 |
|
| 10 |
+
class PresidioEntityRecognitionResponse(BaseModel):
|
| 11 |
+
contains_entities: bool
|
| 12 |
+
detected_entities: Dict[str, List[str]]
|
| 13 |
explanation: str
|
| 14 |
anonymized_text: Optional[str] = None
|
| 15 |
|
| 16 |
+
class PresidioEntityRecognitionSimpleResponse(BaseModel):
|
| 17 |
+
contains_entities: bool
|
| 18 |
explanation: str
|
| 19 |
anonymized_text: Optional[str] = None
|
| 20 |
|
| 21 |
#TODO: Add support for transformers workflow and not just Spacy
|
| 22 |
+
class PresidioEntityRecognitionGuardrail(Guardrail):
|
| 23 |
@staticmethod
|
| 24 |
def get_available_entities() -> List[str]:
|
| 25 |
registry = RecognizerRegistry()
|
|
|
|
| 103 |
)
|
| 104 |
|
| 105 |
@weave.op()
|
| 106 |
+
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
|
| 107 |
"""
|
| 108 |
+
Check if the input prompt contains any entities using Presidio.
|
| 109 |
|
| 110 |
Args:
|
| 111 |
prompt: The text to analyze
|
| 112 |
+
return_detected_types: If True, returns detailed entity type information
|
| 113 |
"""
|
| 114 |
+
# Analyze text for entities
|
| 115 |
analyzer_results = self.analyzer.analyze(
|
| 116 |
text=prompt,
|
| 117 |
entities=self.selected_entities,
|
|
|
|
| 119 |
)
|
| 120 |
|
| 121 |
# Group results by entity type
|
| 122 |
+
detected_entities = {}
|
| 123 |
for result in analyzer_results:
|
| 124 |
entity_type = result.entity_type
|
| 125 |
text_slice = prompt[result.start:result.end]
|
| 126 |
+
if entity_type not in detected_entities:
|
| 127 |
+
detected_entities[entity_type] = []
|
| 128 |
+
detected_entities[entity_type].append(text_slice)
|
| 129 |
|
| 130 |
# Create explanation
|
| 131 |
explanation_parts = []
|
| 132 |
+
if detected_entities:
|
| 133 |
+
explanation_parts.append("Found the following entities in the text:")
|
| 134 |
+
for entity_type, instances in detected_entities.items():
|
| 135 |
+
explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
|
| 136 |
else:
|
| 137 |
+
explanation_parts.append("No entities detected in the text.")
|
| 138 |
|
| 139 |
# Add information about what was checked
|
| 140 |
+
explanation_parts.append("\nChecked for these entity types:")
|
| 141 |
for entity in self.selected_entities:
|
| 142 |
explanation_parts.append(f"- {entity}")
|
| 143 |
|
| 144 |
# Anonymize if requested
|
| 145 |
anonymized_text = None
|
| 146 |
+
if self.should_anonymize and detected_entities:
|
| 147 |
anonymized_result = self.anonymizer.anonymize(
|
| 148 |
text=prompt,
|
| 149 |
analyzer_results=analyzer_results
|
|
|
|
| 151 |
anonymized_text = anonymized_result.text
|
| 152 |
|
| 153 |
if return_detected_types:
|
| 154 |
+
return PresidioEntityRecognitionResponse(
|
| 155 |
+
contains_entities=bool(detected_entities),
|
| 156 |
+
detected_entities=detected_entities,
|
| 157 |
explanation="\n".join(explanation_parts),
|
| 158 |
anonymized_text=anonymized_text
|
| 159 |
)
|
| 160 |
else:
|
| 161 |
+
return PresidioEntityRecognitionSimpleResponse(
|
| 162 |
+
contains_entities=bool(detected_entities),
|
| 163 |
explanation="\n".join(explanation_parts),
|
| 164 |
anonymized_text=anonymized_text
|
| 165 |
)
|
| 166 |
|
| 167 |
@weave.op()
|
| 168 |
+
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> PresidioEntityRecognitionResponse | PresidioEntityRecognitionSimpleResponse:
|
| 169 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
guardrails_genie/guardrails/{pii/regex_pii_guardrail.py β entity_recognition/regex_entity_recognition_guardrail.py}
RENAMED
|
@@ -7,25 +7,25 @@ from ...regex_model import RegexModel
|
|
| 7 |
from ..base import Guardrail
|
| 8 |
|
| 9 |
|
| 10 |
-
class
|
| 11 |
-
|
| 12 |
-
|
| 13 |
explanation: str
|
| 14 |
anonymized_text: Optional[str] = None
|
| 15 |
|
| 16 |
|
| 17 |
-
class
|
| 18 |
-
|
| 19 |
explanation: str
|
| 20 |
anonymized_text: Optional[str] = None
|
| 21 |
|
| 22 |
|
| 23 |
-
class
|
| 24 |
regex_model: RegexModel
|
| 25 |
patterns: Dict[str, str] = {}
|
| 26 |
should_anonymize: bool = False
|
| 27 |
|
| 28 |
-
|
| 29 |
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
| 30 |
"phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
|
| 31 |
"ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
|
|
@@ -41,7 +41,7 @@ class RegexPIIGuardrail(Guardrail):
|
|
| 41 |
def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
|
| 42 |
patterns = {}
|
| 43 |
if use_defaults:
|
| 44 |
-
patterns = self.
|
| 45 |
if kwargs.get("patterns"):
|
| 46 |
patterns.update(kwargs["patterns"])
|
| 47 |
|
|
@@ -56,30 +56,30 @@ class RegexPIIGuardrail(Guardrail):
|
|
| 56 |
)
|
| 57 |
|
| 58 |
@weave.op()
|
| 59 |
-
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
| 60 |
"""
|
| 61 |
-
Check if the input prompt contains any
|
| 62 |
|
| 63 |
Args:
|
| 64 |
-
prompt: Input text to check for
|
| 65 |
-
return_detected_types: If True, returns detailed
|
| 66 |
|
| 67 |
Returns:
|
| 68 |
-
|
| 69 |
"""
|
| 70 |
result = self.regex_model.check(prompt)
|
| 71 |
|
| 72 |
# Create detailed explanation
|
| 73 |
explanation_parts = []
|
| 74 |
if result.matched_patterns:
|
| 75 |
-
explanation_parts.append("Found the following
|
| 76 |
-
for
|
| 77 |
-
explanation_parts.append(f"- {
|
| 78 |
else:
|
| 79 |
-
explanation_parts.append("No
|
| 80 |
|
| 81 |
if result.failed_patterns:
|
| 82 |
-
explanation_parts.append("\nChecked but did not find these
|
| 83 |
for pattern in result.failed_patterns:
|
| 84 |
explanation_parts.append(f"- {pattern}")
|
| 85 |
|
|
@@ -87,25 +87,25 @@ class RegexPIIGuardrail(Guardrail):
|
|
| 87 |
anonymized_text = None
|
| 88 |
if getattr(self, 'should_anonymize', False) and result.matched_patterns:
|
| 89 |
anonymized_text = prompt
|
| 90 |
-
for
|
| 91 |
for match in matches:
|
| 92 |
-
replacement = f"[{
|
| 93 |
anonymized_text = anonymized_text.replace(match, replacement)
|
| 94 |
|
| 95 |
if return_detected_types:
|
| 96 |
-
return
|
| 97 |
-
|
| 98 |
-
|
| 99 |
explanation="\n".join(explanation_parts),
|
| 100 |
anonymized_text=anonymized_text
|
| 101 |
)
|
| 102 |
else:
|
| 103 |
-
return
|
| 104 |
-
|
| 105 |
explanation="\n".join(explanation_parts),
|
| 106 |
anonymized_text=anonymized_text
|
| 107 |
)
|
| 108 |
|
| 109 |
@weave.op()
|
| 110 |
-
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) ->
|
| 111 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
|
|
|
| 7 |
from ..base import Guardrail
|
| 8 |
|
| 9 |
|
| 10 |
+
class RegexEntityRecognitionResponse(BaseModel):
|
| 11 |
+
contains_entities: bool
|
| 12 |
+
detected_entities: Dict[str, list[str]]
|
| 13 |
explanation: str
|
| 14 |
anonymized_text: Optional[str] = None
|
| 15 |
|
| 16 |
|
| 17 |
+
class RegexEntityRecognitionSimpleResponse(BaseModel):
|
| 18 |
+
contains_entities: bool
|
| 19 |
explanation: str
|
| 20 |
anonymized_text: Optional[str] = None
|
| 21 |
|
| 22 |
|
| 23 |
+
class RegexEntityRecognitionGuardrail(Guardrail):
|
| 24 |
regex_model: RegexModel
|
| 25 |
patterns: Dict[str, str] = {}
|
| 26 |
should_anonymize: bool = False
|
| 27 |
|
| 28 |
+
DEFAULT_PATTERNS: ClassVar[Dict[str, str]] = {
|
| 29 |
"email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
| 30 |
"phone_number": r"\b(?:\+?1[-.]?)?\(?(?:[0-9]{3})\)?[-.]?(?:[0-9]{3})[-.]?(?:[0-9]{4})\b",
|
| 31 |
"ssn": r"\b\d{3}[-]?\d{2}[-]?\d{4}\b",
|
|
|
|
| 41 |
def __init__(self, use_defaults: bool = True, should_anonymize: bool = False, **kwargs):
|
| 42 |
patterns = {}
|
| 43 |
if use_defaults:
|
| 44 |
+
patterns = self.DEFAULT_PATTERNS.copy()
|
| 45 |
if kwargs.get("patterns"):
|
| 46 |
patterns.update(kwargs["patterns"])
|
| 47 |
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
@weave.op()
|
| 59 |
+
def guard(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
|
| 60 |
"""
|
| 61 |
+
Check if the input prompt contains any entities based on the regex patterns.
|
| 62 |
|
| 63 |
Args:
|
| 64 |
+
prompt: Input text to check for entities
|
| 65 |
+
return_detected_types: If True, returns detailed entity type information
|
| 66 |
|
| 67 |
Returns:
|
| 68 |
+
RegexEntityRecognitionResponse or RegexEntityRecognitionSimpleResponse containing detection results
|
| 69 |
"""
|
| 70 |
result = self.regex_model.check(prompt)
|
| 71 |
|
| 72 |
# Create detailed explanation
|
| 73 |
explanation_parts = []
|
| 74 |
if result.matched_patterns:
|
| 75 |
+
explanation_parts.append("Found the following entities in the text:")
|
| 76 |
+
for entity_type, matches in result.matched_patterns.items():
|
| 77 |
+
explanation_parts.append(f"- {entity_type}: {len(matches)} instance(s)")
|
| 78 |
else:
|
| 79 |
+
explanation_parts.append("No entities detected in the text.")
|
| 80 |
|
| 81 |
if result.failed_patterns:
|
| 82 |
+
explanation_parts.append("\nChecked but did not find these entity types:")
|
| 83 |
for pattern in result.failed_patterns:
|
| 84 |
explanation_parts.append(f"- {pattern}")
|
| 85 |
|
|
|
|
| 87 |
anonymized_text = None
|
| 88 |
if getattr(self, 'should_anonymize', False) and result.matched_patterns:
|
| 89 |
anonymized_text = prompt
|
| 90 |
+
for entity_type, matches in result.matched_patterns.items():
|
| 91 |
for match in matches:
|
| 92 |
+
replacement = f"[{entity_type.upper()}]"
|
| 93 |
anonymized_text = anonymized_text.replace(match, replacement)
|
| 94 |
|
| 95 |
if return_detected_types:
|
| 96 |
+
return RegexEntityRecognitionResponse(
|
| 97 |
+
contains_entities=not result.passed,
|
| 98 |
+
detected_entities=result.matched_patterns,
|
| 99 |
explanation="\n".join(explanation_parts),
|
| 100 |
anonymized_text=anonymized_text
|
| 101 |
)
|
| 102 |
else:
|
| 103 |
+
return RegexEntityRecognitionSimpleResponse(
|
| 104 |
+
contains_entities=not result.passed,
|
| 105 |
explanation="\n".join(explanation_parts),
|
| 106 |
anonymized_text=anonymized_text
|
| 107 |
)
|
| 108 |
|
| 109 |
@weave.op()
|
| 110 |
+
def predict(self, prompt: str, return_detected_types: bool = True, **kwargs) -> RegexEntityRecognitionResponse | RegexEntityRecognitionSimpleResponse:
|
| 111 |
return self.guard(prompt, return_detected_types=return_detected_types, **kwargs)
|
guardrails_genie/guardrails/{pii/transformers_pipeline_guardrail.py β entity_recognition/transformers_entity_recognition_guardrail.py}
RENAMED
|
@@ -5,19 +5,19 @@ from pydantic import BaseModel
|
|
| 5 |
from ..base import Guardrail
|
| 6 |
import weave
|
| 7 |
|
| 8 |
-
class
|
| 9 |
-
|
| 10 |
-
|
| 11 |
explanation: str
|
| 12 |
anonymized_text: Optional[str] = None
|
| 13 |
|
| 14 |
-
class
|
| 15 |
-
|
| 16 |
explanation: str
|
| 17 |
anonymized_text: Optional[str] = None
|
| 18 |
|
| 19 |
-
class
|
| 20 |
-
"""Generic guardrail for detecting
|
| 21 |
|
| 22 |
_pipeline: Optional[object] = None
|
| 23 |
selected_entities: List[str]
|
|
@@ -82,7 +82,7 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
| 82 |
|
| 83 |
def _print_available_entities(self, entities: List[str]):
|
| 84 |
"""Print all available entity types that can be detected by the model."""
|
| 85 |
-
print("\nAvailable
|
| 86 |
print("=" * 25)
|
| 87 |
for entity in entities:
|
| 88 |
print(f"- {entity}")
|
|
@@ -92,23 +92,23 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
| 92 |
"""Print all available entity types that can be detected by the model."""
|
| 93 |
self._print_available_entities(self.available_entities)
|
| 94 |
|
| 95 |
-
def
|
| 96 |
-
"""Detect
|
| 97 |
results = self._pipeline(text)
|
| 98 |
|
| 99 |
# Group findings by entity type
|
| 100 |
-
|
| 101 |
for entity in results:
|
| 102 |
entity_type = entity['entity_group']
|
| 103 |
if entity_type in self.selected_entities:
|
| 104 |
-
if entity_type not in
|
| 105 |
-
|
| 106 |
-
|
| 107 |
|
| 108 |
-
return
|
| 109 |
|
| 110 |
def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
|
| 111 |
-
"""Anonymize detected
|
| 112 |
results = self._pipeline(text)
|
| 113 |
|
| 114 |
# Sort entities by start position in reverse order to avoid offset issues
|
|
@@ -131,49 +131,49 @@ class TransformersPipelinePIIGuardrail(Guardrail):
|
|
| 131 |
return ' '.join(result.split())
|
| 132 |
|
| 133 |
@weave.op()
|
| 134 |
-
def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) ->
|
| 135 |
-
"""Check if the input prompt contains any
|
| 136 |
|
| 137 |
Args:
|
| 138 |
prompt: The text to analyze
|
| 139 |
-
return_detected_types: If True, returns detailed
|
| 140 |
aggregate_redaction: If True, uses generic [redacted] instead of entity type
|
| 141 |
"""
|
| 142 |
-
# Detect
|
| 143 |
-
|
| 144 |
|
| 145 |
# Create explanation
|
| 146 |
explanation_parts = []
|
| 147 |
-
if
|
| 148 |
-
explanation_parts.append("Found the following
|
| 149 |
-
for
|
| 150 |
-
explanation_parts.append(f"- {
|
| 151 |
else:
|
| 152 |
-
explanation_parts.append("No
|
| 153 |
|
| 154 |
-
explanation_parts.append("\nChecked for these
|
| 155 |
for entity in self.selected_entities:
|
| 156 |
explanation_parts.append(f"- {entity}")
|
| 157 |
|
| 158 |
# Anonymize if requested
|
| 159 |
anonymized_text = None
|
| 160 |
-
if self.should_anonymize and
|
| 161 |
anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
|
| 162 |
|
| 163 |
if return_detected_types:
|
| 164 |
-
return
|
| 165 |
-
|
| 166 |
-
|
| 167 |
explanation="\n".join(explanation_parts),
|
| 168 |
anonymized_text=anonymized_text
|
| 169 |
)
|
| 170 |
else:
|
| 171 |
-
return
|
| 172 |
-
|
| 173 |
explanation="\n".join(explanation_parts),
|
| 174 |
anonymized_text=anonymized_text
|
| 175 |
)
|
| 176 |
|
| 177 |
@weave.op()
|
| 178 |
-
def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) ->
|
| 179 |
return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)
|
|
|
|
| 5 |
from ..base import Guardrail
|
| 6 |
import weave
|
| 7 |
|
| 8 |
+
class TransformersEntityRecognitionResponse(BaseModel):
|
| 9 |
+
contains_entities: bool
|
| 10 |
+
detected_entities: Dict[str, List[str]]
|
| 11 |
explanation: str
|
| 12 |
anonymized_text: Optional[str] = None
|
| 13 |
|
| 14 |
+
class TransformersEntityRecognitionSimpleResponse(BaseModel):
|
| 15 |
+
contains_entities: bool
|
| 16 |
explanation: str
|
| 17 |
anonymized_text: Optional[str] = None
|
| 18 |
|
| 19 |
+
class TransformersEntityRecognitionGuardrail(Guardrail):
|
| 20 |
+
"""Generic guardrail for detecting entities using any token classification model."""
|
| 21 |
|
| 22 |
_pipeline: Optional[object] = None
|
| 23 |
selected_entities: List[str]
|
|
|
|
| 82 |
|
| 83 |
def _print_available_entities(self, entities: List[str]):
|
| 84 |
"""Print all available entity types that can be detected by the model."""
|
| 85 |
+
print("\nAvailable entity types:")
|
| 86 |
print("=" * 25)
|
| 87 |
for entity in entities:
|
| 88 |
print(f"- {entity}")
|
|
|
|
| 92 |
"""Print all available entity types that can be detected by the model."""
|
| 93 |
self._print_available_entities(self.available_entities)
|
| 94 |
|
| 95 |
+
def _detect_entities(self, text: str) -> Dict[str, List[str]]:
|
| 96 |
+
"""Detect entities in the text using the pipeline."""
|
| 97 |
results = self._pipeline(text)
|
| 98 |
|
| 99 |
# Group findings by entity type
|
| 100 |
+
detected_entities = {}
|
| 101 |
for entity in results:
|
| 102 |
entity_type = entity['entity_group']
|
| 103 |
if entity_type in self.selected_entities:
|
| 104 |
+
if entity_type not in detected_entities:
|
| 105 |
+
detected_entities[entity_type] = []
|
| 106 |
+
detected_entities[entity_type].append(entity['word'])
|
| 107 |
|
| 108 |
+
return detected_entities
|
| 109 |
|
| 110 |
def _anonymize_text(self, text: str, aggregate_redaction: bool = True) -> str:
|
| 111 |
+
"""Anonymize detected entities in text using the pipeline."""
|
| 112 |
results = self._pipeline(text)
|
| 113 |
|
| 114 |
# Sort entities by start position in reverse order to avoid offset issues
|
|
|
|
| 131 |
return ' '.join(result.split())
|
| 132 |
|
| 133 |
@weave.op()
|
| 134 |
+
def guard(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
|
| 135 |
+
"""Check if the input prompt contains any entities using the transformer pipeline.
|
| 136 |
|
| 137 |
Args:
|
| 138 |
prompt: The text to analyze
|
| 139 |
+
return_detected_types: If True, returns detailed entity type information
|
| 140 |
aggregate_redaction: If True, uses generic [redacted] instead of entity type
|
| 141 |
"""
|
| 142 |
+
# Detect entities
|
| 143 |
+
detected_entities = self._detect_entities(prompt)
|
| 144 |
|
| 145 |
# Create explanation
|
| 146 |
explanation_parts = []
|
| 147 |
+
if detected_entities:
|
| 148 |
+
explanation_parts.append("Found the following entities in the text:")
|
| 149 |
+
for entity_type, instances in detected_entities.items():
|
| 150 |
+
explanation_parts.append(f"- {entity_type}: {len(instances)} instance(s)")
|
| 151 |
else:
|
| 152 |
+
explanation_parts.append("No entities detected in the text.")
|
| 153 |
|
| 154 |
+
explanation_parts.append("\nChecked for these entities:")
|
| 155 |
for entity in self.selected_entities:
|
| 156 |
explanation_parts.append(f"- {entity}")
|
| 157 |
|
| 158 |
# Anonymize if requested
|
| 159 |
anonymized_text = None
|
| 160 |
+
if self.should_anonymize and detected_entities:
|
| 161 |
anonymized_text = self._anonymize_text(prompt, aggregate_redaction)
|
| 162 |
|
| 163 |
if return_detected_types:
|
| 164 |
+
return TransformersEntityRecognitionResponse(
|
| 165 |
+
contains_entities=bool(detected_entities),
|
| 166 |
+
detected_entities=detected_entities,
|
| 167 |
explanation="\n".join(explanation_parts),
|
| 168 |
anonymized_text=anonymized_text
|
| 169 |
)
|
| 170 |
else:
|
| 171 |
+
return TransformersEntityRecognitionSimpleResponse(
|
| 172 |
+
contains_entities=bool(detected_entities),
|
| 173 |
explanation="\n".join(explanation_parts),
|
| 174 |
anonymized_text=anonymized_text
|
| 175 |
)
|
| 176 |
|
| 177 |
@weave.op()
|
| 178 |
+
def predict(self, prompt: str, return_detected_types: bool = True, aggregate_redaction: bool = True, **kwargs) -> TransformersEntityRecognitionResponse | TransformersEntityRecognitionSimpleResponse:
|
| 179 |
return self.guard(prompt, return_detected_types=return_detected_types, aggregate_redaction=aggregate_redaction, **kwargs)
|
guardrails_genie/guardrails/pii/run_presidio_model.py
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
from guardrails_genie.guardrails.pii.presidio_pii_guardrail import PresidioPIIGuardrail
|
| 2 |
-
import weave
|
| 3 |
-
|
| 4 |
-
def run_presidio_model():
|
| 5 |
-
weave.init("guardrails-genie-pii-presidio-model")
|
| 6 |
-
|
| 7 |
-
# Create the guardrail with default entities and anonymization enabled
|
| 8 |
-
pii_guardrail = PresidioPIIGuardrail(
|
| 9 |
-
selected_entities=["PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER"],
|
| 10 |
-
should_anonymize=True
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
# Check a prompt
|
| 14 |
-
prompt = "Please contact [email protected] or call 123-456-7890. My SSN is 123-45-6789"
|
| 15 |
-
result = pii_guardrail.guard(prompt)
|
| 16 |
-
print(result)
|
| 17 |
-
|
| 18 |
-
# Result will contain:
|
| 19 |
-
# - contains_pii: True
|
| 20 |
-
# - detected_pii_types: {
|
| 21 |
-
# "EMAIL_ADDRESS": ["[email protected]"],
|
| 22 |
-
# "PHONE_NUMBER": ["123-456-7890"],
|
| 23 |
-
# "US_SSN": ["123-45-6789"]
|
| 24 |
-
# }
|
| 25 |
-
# - safe_to_process: False
|
| 26 |
-
# - explanation: Detailed explanation of findings
|
| 27 |
-
# - anonymized_text: "Please contact <EMAIL_ADDRESS> or call <PHONE_NUMBER>. My SSN is <US_SSN>"
|
| 28 |
-
|
| 29 |
-
# Example with no PII
|
| 30 |
-
safe_prompt = "The weather is nice today"
|
| 31 |
-
safe_result = pii_guardrail.guard(safe_prompt)
|
| 32 |
-
print("\nSafe prompt result:")
|
| 33 |
-
print(safe_result)
|
| 34 |
-
|
| 35 |
-
if __name__ == "__main__":
|
| 36 |
-
run_presidio_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/pii/run_regex_model.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
from guardrails_genie.guardrails.pii.regex_pii_guardrail import RegexPIIGuardrail
|
| 2 |
-
import weave
|
| 3 |
-
|
| 4 |
-
def run_regex_model():
|
| 5 |
-
weave.init("guardrails-genie-pii-regex-model")
|
| 6 |
-
# Create the guardrail
|
| 7 |
-
pii_guardrail = RegexPIIGuardrail(use_defaults=True, should_anonymize=True)
|
| 8 |
-
|
| 9 |
-
# Check a prompt
|
| 10 |
-
prompt = "Please contact [email protected] or call 123-456-7890"
|
| 11 |
-
result = pii_guardrail.guard(prompt)
|
| 12 |
-
print(result)
|
| 13 |
-
|
| 14 |
-
# Result will contain:
|
| 15 |
-
# - contains_pii: True
|
| 16 |
-
# - detected_pii_types: {"email": ["[email protected]"], "phone_number": ["123-456-7890"]}
|
| 17 |
-
# - safe_to_process: False
|
| 18 |
-
# - explanation: Detailed explanation of findings
|
| 19 |
-
|
| 20 |
-
if __name__ == "__main__":
|
| 21 |
-
run_regex_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guardrails_genie/guardrails/pii/run_transformers.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from guardrails_genie.guardrails.pii.transformers_pipeline_guardrail import TransformersPipelinePIIGuardrail
|
| 2 |
-
import weave
|
| 3 |
-
|
| 4 |
-
def run_transformers_pipeline():
|
| 5 |
-
weave.init("guardrails-genie-pii-transformers-pipeline-model")
|
| 6 |
-
|
| 7 |
-
# Create the guardrail with default entities and anonymization enabled
|
| 8 |
-
pii_guardrail = TransformersPipelinePIIGuardrail(
|
| 9 |
-
selected_entities=["GIVENNAME", "SURNAME", "EMAIL", "TELEPHONENUM", "SOCIALNUM", "PHONE_NUMBER"],
|
| 10 |
-
should_anonymize=True,
|
| 11 |
-
model_name="lakshyakh93/deberta_finetuned_pii",
|
| 12 |
-
show_available_entities=True
|
| 13 |
-
)
|
| 14 |
-
|
| 15 |
-
# Check a prompt
|
| 16 |
-
prompt = "Please contact John Smith at [email protected] or call 123-456-7890. My SSN is 123-45-6789"
|
| 17 |
-
result = pii_guardrail.guard(prompt, aggregate_redaction=False)
|
| 18 |
-
print(result)
|
| 19 |
-
|
| 20 |
-
# Result will contain:
|
| 21 |
-
# - contains_pii: True
|
| 22 |
-
# - detected_pii_types: {
|
| 23 |
-
# "GIVENNAME": ["John"],
|
| 24 |
-
# "SURNAME": ["Smith"],
|
| 25 |
-
# "EMAIL": ["[email protected]"],
|
| 26 |
-
# "TELEPHONENUM": ["123-456-7890"],
|
| 27 |
-
# "SOCIALNUM": ["123-45-6789"]
|
| 28 |
-
# }
|
| 29 |
-
# - safe_to_process: False
|
| 30 |
-
# - explanation: Detailed explanation of findings
|
| 31 |
-
# - anonymized_text: "Please contact [redacted] [redacted] at [redacted] or call [redacted]. My SSN is [redacted]"
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
if __name__ == "__main__":
|
| 35 |
-
run_transformers_pipeline()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|