Spaces:

protectai
/

prompt-injection-benchmark

Running

App Files Files Community

asofter commited on Feb 19, 2024

Commit

0604028

1 Parent(s): 62b6050

* add AWS comprehend

Browse files

Files changed (3) hide show

README.md +2 -1
app.py +38 -3
requirements.txt +7 -6

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 📝
 colorFrom: yellow
 colorTo: gray
 sdk: gradio
-sdk_version: 4.9.0
 pinned: true
 license: apache-2.0
 ---
@@ -35,3 +35,4 @@ gradio app.py
 - [Rebuff](https://rebuff.ai/)
 - [Azure Content Safety AI](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/studio-quickstart)
 - [AWS Bedrock Guardrails](https://aws.amazon.com/bedrock/guardrails/) (coming soon)

 colorFrom: yellow
 colorTo: gray
 sdk: gradio
+sdk_version: 4.19.1
 pinned: true
 license: apache-2.0
 ---
 - [Rebuff](https://rebuff.ai/)
 - [Azure Content Safety AI](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/studio-quickstart)
 - [AWS Bedrock Guardrails](https://aws.amazon.com/bedrock/guardrails/) (coming soon)
+- [AWS Comprehend](https://docs.aws.amazon.com/comprehend/latest/dg/trust-safety.html)

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from functools import lru_cache
 from typing import List, Union
 import aegis
 import gradio as gr
 import requests
 from huggingface_hub import HfApi
@@ -29,6 +30,7 @@ automorphic_api_key = os.getenv("AUTOMORPHIC_API_KEY")
 rebuff_api_key = os.getenv("REBUFF_API_KEY")
 azure_content_safety_endpoint = os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT")
 azure_content_safety_key = os.getenv("AZURE_CONTENT_SAFETY_KEY")
 @lru_cache(maxsize=2)
@@ -61,7 +63,9 @@ def convert_elapsed_time(diff_time) -> float:
 deepset_classifier = init_prompt_injection_model(
     "ProtectAI/deberta-v3-base-injection-onnx"
 )  # ONNX version of deepset/deberta-v3-base-injection
-protectai_classifier = init_prompt_injection_model("ProtectAI/deberta-v3-base-prompt-injection", "onnx")
 fmops_classifier = init_prompt_injection_model(
     "ProtectAI/fmops-distilbert-prompt-injection-onnx"
 )  # ONNX version of fmops/distilbert-prompt-injection
@@ -155,6 +159,36 @@ def detect_azure(prompt: str) -> (bool, bool):
         return False, False
 detection_providers = {
     "ProtectAI (HF model)": detect_hf_protectai,
     "Deepset (HF model)": detect_hf_deepset,
@@ -163,6 +197,7 @@ detection_providers = {
     "Automorphic Aegis": detect_automorphic,
     # "Rebuff": detect_rebuff,
     "Azure Content Safety": detect_azure,
 }
@@ -235,8 +270,8 @@ if __name__ == "__main__":
         "The results are <strong>stored in the private dataset</strong> for further analysis and improvements. This interface is for research purposes only."
         "<br /><br />"
         "HuggingFace (HF) models are hosted on Spaces while other providers are called as APIs.<br /><br />"
-        "<a href=\"https://join.slack.com/t/laiyerai/shared_invite/zt-28jv3ci39-sVxXrLs3rQdaN3mIl9IT~w\">Join our Slack community to discuss LLM Security</a><br />"
-        "<a href=\"https://github.com/protectai/llm-guard\">Secure your LLM interactions with LLM Guard</a>",
         examples=[
             [
                 example,

 from typing import List, Union
 import aegis
+import boto3
 import gradio as gr
 import requests
 from huggingface_hub import HfApi
 rebuff_api_key = os.getenv("REBUFF_API_KEY")
 azure_content_safety_endpoint = os.getenv("AZURE_CONTENT_SAFETY_ENDPOINT")
 azure_content_safety_key = os.getenv("AZURE_CONTENT_SAFETY_KEY")
+aws_comprehend_client = boto3.client(service_name="comprehend", region_name="us-east-1")
 @lru_cache(maxsize=2)
 deepset_classifier = init_prompt_injection_model(
     "ProtectAI/deberta-v3-base-injection-onnx"
 )  # ONNX version of deepset/deberta-v3-base-injection
+protectai_classifier = init_prompt_injection_model(
+    "ProtectAI/deberta-v3-base-prompt-injection", "onnx"
+)
 fmops_classifier = init_prompt_injection_model(
     "ProtectAI/fmops-distilbert-prompt-injection-onnx"
 )  # ONNX version of fmops/distilbert-prompt-injection
         return False, False
+def detect_aws_comprehend(prompt: str) -> (bool, bool):
+    response = aws_comprehend_client.classify_document(
+        EndpointArn="arn:aws:comprehend:us-east-1:aws:document-classifier-endpoint/prompt-safety",
+        Text=prompt,
+    )
+    response = {
+        "Classes": [
+            {"Name": "SAFE_PROMPT", "Score": 0.9010000228881836},
+            {"Name": "UNSAFE_PROMPT", "Score": 0.0989999994635582},
+        ],
+        "ResponseMetadata": {
+            "RequestId": "e8900fe1-3346-45c0-bad3-007b2840865a",
+            "HTTPStatusCode": 200,
+            "HTTPHeaders": {
+                "x-amzn-requestid": "e8900fe1-3346-45c0-bad3-007b2840865a",
+                "content-type": "application/x-amz-json-1.1",
+                "content-length": "115",
+                "date": "Mon, 19 Feb 2024 08:34:43 GMT",
+            },
+            "RetryAttempts": 0,
+        },
+    }
+    logger.info(f"Prompt injection result from AWS Comprehend: {response}")
+    if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
+        logger.error(f"Failed to call AWS Comprehend API: {response}")
+        return False, False
+    return True, response["Classes"][0] == "UNSAFE_PROMPT"
 detection_providers = {
     "ProtectAI (HF model)": detect_hf_protectai,
     "Deepset (HF model)": detect_hf_deepset,
     "Automorphic Aegis": detect_automorphic,
     # "Rebuff": detect_rebuff,
     "Azure Content Safety": detect_azure,
+    "AWS Comprehend": detect_aws_comprehend,
 }
         "The results are <strong>stored in the private dataset</strong> for further analysis and improvements. This interface is for research purposes only."
         "<br /><br />"
         "HuggingFace (HF) models are hosted on Spaces while other providers are called as APIs.<br /><br />"
+        '<a href="https://join.slack.com/t/laiyerai/shared_invite/zt-28jv3ci39-sVxXrLs3rQdaN3mIl9IT~w">Join our Slack community to discuss LLM Security</a><br />'
+        '<a href="https://github.com/protectai/llm-guard">Secure your LLM interactions with LLM Guard</a>',
         examples=[
             [
                 example,

requirements.txt CHANGED Viewed

@@ -1,8 +1,9 @@
 git+https://github.com/automorphic-ai/aegis.git
-gradio==4.9.0
-huggingface_hub==0.19.4
-onnxruntime==1.16.3
-optimum[onnxruntime]==1.15.0
-rebuff==0.0.5
 requests==2.31.0
-transformers==4.36.0

+boto3==1.34.44
 git+https://github.com/automorphic-ai/aegis.git
+gradio==4.19.1
+huggingface_hub==0.20.3
+onnxruntime==1.17.0
+optimum[onnxruntime]==1.17.1
+rebuff==0.1.1
 requests==2.31.0
+transformers==4.37.2