Spaces:

SSK-14
/

LLM-Guard

Sleeping

App Files Files Community

SSK-14 commited on Mar 25, 2024

Commit

acb544e

1 Parent(s): 4cab2aa

Add LLM guard api

Browse files

Files changed (18) hide show

Dockerfile +45 -0
Dockerfile-cuda +53 -0
Makefile +33 -0
README.md +0 -2
app/__init__.py +0 -0
app/__main__.py +4 -0
app/app.py +325 -0
app/cache.py +145 -0
app/config.py +93 -0
app/otel.py +85 -0
app/scanner.py +107 -0
app/schemas.py +24 -0
app/util.py +57 -0
app/version.py +1 -0
config/scanners.yml +162 -0
docker-compose.yml +11 -0
openapi.json +319 -0
pyproject.toml +57 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,45 @@

+# Use the Python 3.11 slim image
+FROM python:3.11-slim
+LABEL org.opencontainers.image.source=https://github.com/protectai/llm-guard
+LABEL org.opencontainers.image.description="LLM Guard API"
+LABEL org.opencontainers.image.licenses=MIT
+# Install system packages needed for building
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# ensures that the python output is sent straight to terminal (e.g. your container log)
+# without being first buffered and that you can see the output of your application (e.g. django logs)
+# in real time. Equivalent to python -u: https://docs.python.org/3/using/cmdline.html#cmdoption-u
+ENV PYTHONUNBUFFERED 1
+# https://docs.python.org/3/using/cmdline.html#envvar-PYTHONDONTWRITEBYTECODE
+# Prevents Python from writing .pyc files to disk
+ENV PYTHONDONTWRITEBYTECODE 1
+# Set up a working directory
+WORKDIR $HOME/app
+# Copy pyproject.toml and other necessary files for installation
+COPY --chown=user:user pyproject.toml ./
+COPY --chown=user:user app ./app
+# Install the project's dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir ".[cpu]"
+RUN python -m spacy download en_core_web_sm
+COPY --chown=user:user ./config/scanners.yml ./config/scanners.yml
+EXPOSE 7860
+CMD ["llm_guard_api", "/home/user/app/config/scanners.yml"]

Dockerfile-cuda ADDED Viewed

	@@ -0,0 +1,53 @@

+# Start from an NVIDIA CUDA base image with Python 3
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
+LABEL org.opencontainers.image.source=https://github.com/protectai/llm-guard
+LABEL org.opencontainers.image.description="LLM Guard API"
+LABEL org.opencontainers.image.licenses=MIT
+# Install Python and other necessary packages
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    python3-dev \
+    build-essential \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+# Alias python3 to python
+RUN ln -s /usr/bin/python3 /usr/bin/python
+# Create a non-root user and set user environment variables
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# ensures that the python output is sent straight to terminal (e.g. your container log)
+# without being first buffered and that you can see the output of your application (e.g. django logs)
+# in real time. Equivalent to python -u: https://docs.python.org/3/using/cmdline.html#cmdoption-u
+ENV PYTHONUNBUFFERED 1
+# https://docs.python.org/3/using/cmdline.html#envvar-PYTHONDONTWRITEBYTECODE
+# Prevents Python from writing .pyc files to disk
+ENV PYTHONDONTWRITEBYTECODE 1
+# Set up a working directory
+WORKDIR $HOME/app
+# Copy pyproject.toml and other necessary files for installation
+COPY --chown=user:user pyproject.toml ./
+COPY --chown=user:user app ./app
+# Install the project's dependencies
+RUN pip3 install --no-cache-dir --upgrade pip && \
+    pip3 install --no-cache-dir torch==2.0.1 --extra-index-url https://download.pytorch.org/whl/cu118 && \
+    pip3 install --no-cache-dir ".[gpu]"
+RUN python -m spacy download en_core_web_sm
+COPY --chown=user:user ./config/scanners.yml ./config/scanners.yml
+# Expose the port the app runs on
+EXPOSE 7860
+# Specify the default command
+CMD ["llm_guard_api", "/home/user/app/config/scanners.yml"]

Makefile ADDED Viewed

	@@ -0,0 +1,33 @@

+### --------------------------------------------------------------------------------------------------------------------
+### Variables
+### --------------------------------------------------------------------------------------------------------------------
+# Docker config
+DOCKER_IMAGE_NAME=laiyer/llm-guard-api
+VERSION=0.3.10
+# Other config
+NO_COLOR=\033[0m
+OK_COLOR=\033[32;01m
+ERROR_COLOR=\033[31;01m
+WARN_COLOR=\033[33;01m
+install:
+	@python -m pip install ".[cpu]"
+build-docker-multi:
+	@docker buildx build --platform linux/amd64,linux/arm64 -t $(DOCKER_IMAGE_NAME):$(VERSION) -t $(DOCKER_IMAGE_NAME):latest . --push
+build-docker-cuda-multi:
+	@docker buildx build --platform linux/amd64 -t $(DOCKER_IMAGE_NAME):$(VERSION)-cuda -t $(DOCKER_IMAGE_NAME):latest-cuda -f Dockerfile-cuda . --push
+run: install
+	llm_guard_api ./config/scanners.yml
+run-docker:
+	@docker run -p 7860:7860 -e DEBUG='true' -v ./config:/home/user/app/config $(DOCKER_IMAGE_NAME):$(VERSION)
+run-docker-cuda:
+	@docker run --gpus all -p 7860:7860 -e DEBUG='true' -v ./config:/home/user/app/config $(DOCKER_IMAGE_NAME):$(VERSION)-cuda
+.PHONY: install run build-docker-multi build-docker-cuda-multi run-docker run-docker-cuda

README.md CHANGED Viewed

@@ -7,5 +7,3 @@ sdk: docker
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 license: mit
 ---

app/__init__.py ADDED Viewed

File without changes

app/__main__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from app import run_app
+if __name__ == "__main__":
+    run_app()

app/app.py ADDED Viewed

	@@ -0,0 +1,325 @@

+import argparse
+import asyncio
+import concurrent.futures
+import time
+from typing import Annotated
+import structlog
+from fastapi import Depends, FastAPI, HTTPException, Response, status
+from fastapi.encoders import jsonable_encoder
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.security import (
+    HTTPAuthorizationCredentials,
+    HTTPBasic,
+    HTTPBasicCredentials,
+    HTTPBearer,
+)
+from opentelemetry import metrics
+from prometheus_client import CONTENT_TYPE_LATEST, REGISTRY, generate_latest
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.middleware import SlowAPIMiddleware
+from slowapi.util import get_remote_address
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from llm_guard import scan_output, scan_prompt
+from llm_guard.vault import Vault
+from .cache import InMemoryCache
+from .config import AuthConfig, get_config
+from .otel import configure_otel, instrument_app
+from .scanner import get_input_scanners, get_output_scanners
+from .schemas import (
+    AnalyzeOutputRequest,
+    AnalyzeOutputResponse,
+    AnalyzePromptRequest,
+    AnalyzePromptResponse,
+)
+from .util import configure_logger
+from .version import __version__
+vault = Vault()
+parser = argparse.ArgumentParser(description="LLM Guard API")
+parser.add_argument("config", type=str, help="Path to the configuration file")
+args = parser.parse_args()
+scanners_config_file = args.config
+config = get_config(scanners_config_file)
+LOGGER = structlog.getLogger(__name__)
+log_level = config.app.log_level
+is_debug = log_level == "DEBUG"
+configure_logger(log_level)
+configure_otel(config.app.name, config.tracing, config.metrics)
+input_scanners = get_input_scanners(config.input_scanners, vault)
+output_scanners = get_output_scanners(config.output_scanners, vault)
+meter = metrics.get_meter_provider().get_meter(__name__)
+scanners_valid_counter = meter.create_counter(
+    name="scanners.valid",
+    unit="1",
+    description="measures the number of valid scanners",
+)
+def create_app() -> FastAPI:
+    cache = InMemoryCache(
+        max_size=config.cache.max_size,
+        expiration_time=config.cache.ttl,
+    )
+    if config.app.scan_fail_fast:
+        LOGGER.debug("Scan fail_fast mode is enabled")
+    app = FastAPI(
+        title=config.app.name,
+        description="API to run LLM Guard scanners.",
+        debug=is_debug,
+        version=__version__,
+        openapi_url="/openapi.json" if is_debug else None,  # hide docs in production
+    )
+    register_routes(app, cache, input_scanners, output_scanners)
+    return app
+def _check_auth_function(auth_config: AuthConfig) -> callable:
+    async def check_auth_noop() -> bool:
+        return True
+    if not auth_config:
+        return check_auth_noop
+    if auth_config.type == "http_bearer":
+        credentials_type = Annotated[HTTPAuthorizationCredentials, Depends(HTTPBearer())]
+    elif auth_config.type == "http_basic":
+        credentials_type = Annotated[HTTPBasicCredentials, Depends(HTTPBasic())]
+    else:
+        raise ValueError(f"Invalid auth type: {auth_config.type}")
+    async def check_auth(credentials: credentials_type) -> bool:
+        if auth_config.type == "http_bearer":
+            if credentials.credentials != auth_config.token:
+                raise HTTPException(
+                    status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key"
+                )
+        elif auth_config.type == "http_basic":
+            if (
+                credentials.username != auth_config.username
+                or credentials.password != auth_config.password
+            ):
+                raise HTTPException(
+                    status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid Username or Password"
+                )
+        return True
+    return check_auth
+def register_routes(
+    app: FastAPI, cache: InMemoryCache, input_scanners: list, output_scanners: list
+):
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["Authorization", "Content-Type"],
+    )
+    limiter = Limiter(key_func=get_remote_address, default_limits=[config.rate_limit.limit])
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+    if bool(config.rate_limit.enabled):
+        app.add_middleware(SlowAPIMiddleware)
+    check_auth = _check_auth_function(config.auth)
+    @app.get("/", tags=["Main"])
+    @limiter.exempt
+    async def read_root():
+        return {"name": "LLM Guard API"}
+    @app.get("/healthz", tags=["Health"])
+    @limiter.exempt
+    async def healthcheck():
+        return JSONResponse({"status": "alive"})
+    @app.get("/readyz", tags=["Health"])
+    @limiter.exempt
+    async def liveliness():
+        return JSONResponse({"status": "ready"})
+    @app.post(
+        "/analyze/output",
+        tags=["Analyze"],
+        response_model=AnalyzeOutputResponse,
+        status_code=status.HTTP_200_OK,
+        description="Analyze an output and return the sanitized output and the results of the scanners",
+    )
+    async def analyze_output(
+        request: AnalyzeOutputRequest, _: Annotated[bool, Depends(check_auth)]
+    ) -> AnalyzeOutputResponse:
+        LOGGER.debug("Received analyze output request", request=request)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            loop = asyncio.get_event_loop()
+            try:
+                start_time = time.time()
+                sanitized_output, results_valid, results_score = await asyncio.wait_for(
+                    loop.run_in_executor(
+                        executor,
+                        scan_output,
+                        output_scanners,
+                        request.prompt,
+                        request.output,
+                        config.app.scan_fail_fast,
+                    ),
+                    timeout=config.app.scan_output_timeout,
+                )
+                for scanner, valid in results_valid.items():
+                    scanners_valid_counter.add(
+                        1, {"source": "output", "valid": valid, "scanner": scanner}
+                    )
+                response = AnalyzeOutputResponse(
+                    sanitized_output=sanitized_output,
+                    is_valid=all(results_valid.values()),
+                    scanners=results_score,
+                )
+                elapsed_time = time.time() - start_time
+                LOGGER.debug(
+                    "Sanitized response",
+                    scores=results_score,
+                    elapsed_time_seconds=round(elapsed_time, 6),
+                )
+            except asyncio.TimeoutError:
+                raise HTTPException(
+                    status_code=status.HTTP_408_REQUEST_TIMEOUT, detail="Request timeout."
+                )
+        return response
+    @app.post(
+        "/analyze/prompt",
+        tags=["Analyze"],
+        response_model=AnalyzePromptResponse,
+        status_code=status.HTTP_200_OK,
+        description="Analyze a prompt and return the sanitized prompt and the results of the scanners",
+    )
+    async def analyze_prompt(
+        request: AnalyzePromptRequest,
+        _: Annotated[bool, Depends(check_auth)],
+        response: Response,
+    ) -> AnalyzePromptResponse:
+        LOGGER.debug("Received analyze prompt request", request=request)
+        cached_result = cache.get(request.prompt)
+        if cached_result:
+            LOGGER.debug("Response was found in cache")
+            response.headers["X-Cache-Hit"] = "true"
+            return AnalyzePromptResponse(**cached_result)
+        response.headers["X-Cache-Hit"] = "false"
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            loop = asyncio.get_event_loop()
+            try:
+                start_time = time.time()
+                sanitized_prompt, results_valid, results_score = await asyncio.wait_for(
+                    loop.run_in_executor(
+                        executor,
+                        scan_prompt,
+                        input_scanners,
+                        request.prompt,
+                        config.app.scan_fail_fast,
+                    ),
+                    timeout=config.app.scan_prompt_timeout,
+                )
+                for scanner, valid in results_valid.items():
+                    scanners_valid_counter.add(
+                        1, {"source": "input", "valid": valid, "scanner": scanner}
+                    )
+                response = AnalyzePromptResponse(
+                    sanitized_prompt=sanitized_prompt,
+                    is_valid=all(results_valid.values()),
+                    scanners=results_score,
+                )
+                cache.set(request.prompt, response.dict())
+                elapsed_time = time.time() - start_time
+                LOGGER.debug(
+                    "Sanitized prompt response returned",
+                    scores=results_score,
+                    elapsed_time_seconds=round(elapsed_time, 6),
+                )
+            except asyncio.TimeoutError:
+                raise HTTPException(
+                    status_code=status.HTTP_408_REQUEST_TIMEOUT, detail="Request timeout."
+                )
+        return response
+    if config.metrics and config.metrics.exporter == "prometheus":
+        @app.get("/metrics", tags=["Metrics"])
+        @limiter.exempt
+        async def metrics():
+            return Response(
+                content=generate_latest(REGISTRY), headers={"Content-Type": CONTENT_TYPE_LATEST}
+            )
+    @app.on_event("shutdown")
+    async def shutdown_event():
+        LOGGER.info("Shutting down app...")
+    @app.exception_handler(StarletteHTTPException)
+    async def http_exception_handler(request, exc):
+        LOGGER.warning(
+            "HTTP exception", exception_status_code=exc.status_code, exception_detail=exc.detail
+        )
+        return JSONResponse(
+            {"message": str(exc.detail), "details": None}, status_code=exc.status_code
+        )
+    @app.exception_handler(RequestValidationError)
+    async def validation_exception_handler(request, exc):
+        LOGGER.warning("Invalid request", exception=str(exc))
+        response = {"message": "Validation failed", "details": exc.errors()}
+        return JSONResponse(
+            jsonable_encoder(response), status_code=status.HTTP_422_UNPROCESSABLE_ENTITY
+        )
+app = create_app()
+instrument_app(app)
+def run_app():
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=config.app.port,
+        server_header=False,
+        log_level=log_level.lower(),
+        proxy_headers=True,
+        forwarded_allow_ips="*",
+        timeout_keep_alive=2,
+    )

app/cache.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import threading
+import time
+from collections import OrderedDict
+from typing import Optional
+class InMemoryCache:
+    """
+    A simple in-memory cache using an OrderedDict.
+    This cache supports setting a maximum size and expiration time for cached items.
+    When the cache is full, it uses a Least Recently Used (LRU) eviction policy.
+    Thread-safe using a threading Lock.
+    Attributes:
+        max_size (int, optional): Maximum number of items to store in the cache.
+        expiration_time (int, optional): Time in seconds after which a cached item expires. Default is 1 hour.
+    Example:
+        cache = InMemoryCache(max_size=3, expiration_time=5)
+        # setting cache values
+        cache.set("a", 1)
+        cache.set("b", 2)
+        cache["c"] = 3
+        # getting cache values
+        a = cache.get("a")
+        b = cache["b"]
+    """
+    def __init__(self, max_size: Optional[int] = None, expiration_time: Optional[int] = 60 * 60):
+        """
+        Initialize a new InMemoryCache instance.
+        Args:
+            max_size (int, optional): Maximum number of items to store in the cache.
+            expiration_time (int, optional): Time in seconds after which a cached item expires. Default is 1 hour.
+        """
+        self._cache = OrderedDict()
+        self._lock = threading.Lock()
+        self.max_size = max_size
+        self.expiration_time = expiration_time
+    def get(self, key):
+        """
+        Retrieve an item from the cache.
+        Args:
+            key: The key of the item to retrieve.
+        Returns:
+            The value associated with the key, or None if the key is not found or the item has expired.
+        """
+        with self._lock:
+            if key in self._cache:
+                item = self._cache.pop(key)
+                if (
+                    self.expiration_time is None
+                    or time.time() - item["time"] < self.expiration_time
+                ):
+                    # Move the key to the end to make it recently used
+                    self._cache[key] = item
+                    return item["value"]
+                else:
+                    self.delete(key)
+            return None
+    def set(self, key, value):
+        """
+        Add an item to the cache.
+        If the cache is full, the least recently used item is evicted.
+        Args:
+            key: The key of the item.
+            value: The value to cache.
+        """
+        with self._lock:
+            if key in self._cache:
+                # Remove existing key before re-inserting to update order
+                self.delete(key)
+            elif self.max_size and len(self._cache) >= self.max_size:
+                # Remove least recently used item
+                self._cache.popitem(last=False)
+            self._cache[key] = {"value": value, "time": time.time()}
+    def get_or_set(self, key, value):
+        """
+        Retrieve an item from the cache. If the item does not exist, set it with the provided value.
+        Args:
+            key: The key of the item.
+            value: The value to cache if the item doesn't exist.
+        Returns:
+            The cached value associated with the key.
+        """
+        with self._lock:
+            if key in self._cache:
+                return self.get(key)
+            self.set(key, value)
+            return value
+    def delete(self, key):
+        """
+        Remove an item from the cache.
+        Args:
+            key: The key of the item to remove.
+        """
+        # with self._lock:
+        self._cache.pop(key, None)
+    def clear(self):
+        """
+        Clear all items from the cache.
+        """
+        with self._lock:
+            self._cache.clear()
+    def __contains__(self, key):
+        """Check if the key is in the cache."""
+        return key in self._cache
+    def __getitem__(self, key):
+        """Retrieve an item from the cache using the square bracket notation."""
+        return self.get(key)
+    def __setitem__(self, key, value):
+        """Add an item to the cache using the square bracket notation."""
+        self.set(key, value)
+    def __delitem__(self, key):
+        """Remove an item from the cache using the square bracket notation."""
+        self.delete(key)
+    def __len__(self):
+        """Return the number of items in the cache."""
+        return len(self._cache)
+    def __repr__(self):
+        """Return a string representation of the InMemoryCache instance."""
+        return f"InMemoryCache(max_size={self.max_size}, expiration_time={self.expiration_time})"

app/config.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import os
+import re
+from typing import Any, Dict, List, Literal, Optional
+import structlog
+import yaml
+from pydantic import BaseModel, Field
+LOGGER = structlog.getLogger(__name__)
+_var_matcher = re.compile(r"\${([^}^{]+)}")
+_tag_matcher = re.compile(r"[^$]*\${([^}^{]+)}.*")
+class RateLimitConfig(BaseModel):
+    enabled: bool = Field(default=False)
+    limit: str = Field(default="100/minute")
+class CacheConfig(BaseModel):
+    ttl: int = Field(default=60)
+    max_size: Optional[int] = Field(default=None)
+class AuthConfig(BaseModel):
+    type: Literal["http_bearer", "http_basic"] = Field()
+    token: Optional[str] = Field(default=None)
+    username: Optional[str] = Field(default=None)
+    password: Optional[str] = Field(default=None)
+class TracingConfig(BaseModel):
+    exporter: Literal["otel_http", "console"] = Field(default="console")
+    endpoint: Optional[str] = Field(default=None)
+class MetricsConfig(BaseModel):
+    exporter: Literal["otel_http", "prometheus", "console"] = Field(default="console")
+    endpoint: Optional[str] = Field(default=None)
+class AppConfig(BaseModel):
+    name: Optional[str] = Field(default="LLM Guard API")
+    port: Optional[int] = Field(default=7860)
+    log_level: Optional[str] = Field(default="INFO")
+    scan_fail_fast: Optional[bool] = Field(default=False)
+    scan_prompt_timeout: Optional[int] = Field(default=10)
+    scan_output_timeout: Optional[int] = Field(default=30)
+class ScannerConfig(BaseModel):
+    type: str
+    params: Optional[Dict] = Field(default_factory=dict)
+class Config(BaseModel):
+    input_scanners: List[ScannerConfig] = Field()
+    output_scanners: List[ScannerConfig] = Field()
+    rate_limit: RateLimitConfig = Field(default_factory=RateLimitConfig)
+    cache: CacheConfig = Field(default_factory=CacheConfig)
+    auth: Optional[AuthConfig] = Field(default=None)
+    app: AppConfig = Field(default_factory=AppConfig)
+    tracing: Optional[TracingConfig] = Field(default=None)
+    metrics: Optional[MetricsConfig] = Field(default=None)
+def _path_constructor(_loader: Any, node: Any):
+    def replace_fn(match):
+        envparts = f"{match.group(1)}:".split(":")
+        return os.environ.get(envparts[0], envparts[1])
+    return _var_matcher.sub(replace_fn, node.value)
+def load_yaml(filename: str) -> dict:
+    yaml.add_implicit_resolver("!envvar", _tag_matcher, None, yaml.SafeLoader)
+    yaml.add_constructor("!envvar", _path_constructor, yaml.SafeLoader)
+    try:
+        with open(filename, "r") as f:
+            return yaml.safe_load(f.read())
+    except (FileNotFoundError, PermissionError, yaml.YAMLError) as exc:
+        LOGGER.error("Error loading YAML file", exception=exc)
+        return dict()
+def get_config(file_name: str) -> Optional[Config]:
+    LOGGER.debug("Loading config file", file_name=file_name)
+    conf = load_yaml(file_name)
+    if conf == {}:
+        return None
+    return Config(**conf)

app/otel.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from fastapi import FastAPI
+from opentelemetry import metrics, propagate, trace
+from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.exporter.prometheus import PrometheusMetricReader
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.propagators.aws import AwsXRayPropagator
+from opentelemetry.sdk.extension.aws.resource.ec2 import AwsEc2ResourceDetector
+from opentelemetry.sdk.extension.aws.trace import AwsXRayIdGenerator
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import ConsoleMetricExporter, PeriodicExportingMetricReader
+from opentelemetry.sdk.resources import (
+    SERVICE_NAME,
+    SERVICE_VERSION,
+    Resource,
+    get_aggregated_resources,
+)
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
+from .config import MetricsConfig, TracingConfig
+from .version import __version__
+def _configure_tracing(tracing_config: TracingConfig, resource: Resource) -> None:
+    if tracing_config is None:
+        return
+    if tracing_config.exporter == "xray":
+        propagate.set_global_textmap(AwsXRayPropagator())
+        resource = resource.merge(
+            get_aggregated_resources(
+                [AwsEc2ResourceDetector()],
+            )
+        )
+    tracer_provider = TracerProvider(resource=resource)
+    if tracing_config.exporter == "xray":
+        tracer_provider.id_generator = AwsXRayIdGenerator()
+        exporter = OTLPSpanExporter(endpoint=tracing_config.endpoint)
+    elif tracing_config.exporter == "otel_http":
+        exporter = OTLPSpanExporter(endpoint=tracing_config.endpoint)
+    elif tracing_config.exporter == "console":
+        exporter = ConsoleSpanExporter()
+    tracer_provider.add_span_processor(BatchSpanProcessor(exporter))
+    trace.set_tracer_provider(tracer_provider)
+def _configure_metrics(metrics_config: MetricsConfig, resource: Resource) -> None:
+    if metrics_config is None:
+        return
+    if metrics_config.exporter == "console":
+        reader = PeriodicExportingMetricReader(ConsoleMetricExporter())
+    elif metrics_config.exporter == "otel_http":
+        reader = PeriodicExportingMetricReader(OTLPMetricExporter(endpoint=metrics_config.endpoint))
+    elif metrics_config.exporter == "prometheus":
+        reader = PrometheusMetricReader()
+    meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
+    metrics.set_meter_provider(meter_provider)
+def configure_otel(
+    app_name: str, tracing_config: TracingConfig, metrics_config: MetricsConfig
+) -> None:
+    resource = Resource(
+        attributes={
+            SERVICE_NAME: app_name,
+            SERVICE_VERSION: __version__,
+        }
+    )
+    _configure_tracing(tracing_config, resource)
+    _configure_metrics(metrics_config, resource)
+def instrument_app(app: FastAPI) -> None:
+    FastAPIInstrumentor.instrument_app(
+        app,
+        excluded_urls="healthz,readyz,metrics",
+        meter_provider=metrics.get_meter_provider(),
+        tracer_provider=trace.get_tracer_provider(),
+    )

app/scanner.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from typing import Dict, List, Optional
+import structlog
+from llm_guard import input_scanners, output_scanners
+from llm_guard.input_scanners.base import Scanner as InputScanner
+from llm_guard.output_scanners.base import Scanner as OutputScanner
+from llm_guard.vault import Vault
+from .config import ScannerConfig
+from .util import get_resource_utilization
+LOGGER = structlog.getLogger(__name__)
+def get_input_scanners(scanners: List[ScannerConfig], vault: Vault) -> List[InputScanner]:
+    """
+    Load input scanners from the configuration file.
+    """
+    input_scanners_loaded = []
+    for scanner in scanners:
+        LOGGER.debug("Loading input scanner", scanner=scanner.type, **get_resource_utilization())
+        input_scanners_loaded.append(
+            _get_input_scanner(
+                scanner.type,
+                scanner.params,
+                vault=vault,
+            )
+        )
+    return input_scanners_loaded
+def get_output_scanners(scanners: List[ScannerConfig], vault: Vault) -> List[OutputScanner]:
+    """
+    Load output scanners from the configuration file.
+    """
+    output_scanners_loaded = []
+    for scanner in scanners:
+        LOGGER.debug("Loading output scanner", scanner=scanner.type, **get_resource_utilization())
+        output_scanners_loaded.append(
+            _get_output_scanner(
+                scanner.type,
+                scanner.params,
+                vault=vault,
+            )
+        )
+    return output_scanners_loaded
+def _get_input_scanner(
+    scanner_name: str,
+    scanner_config: Optional[Dict],
+    *,
+    vault: Vault,
+):
+    if scanner_config is None:
+        scanner_config = {}
+    if scanner_name == "Anonymize":
+        scanner_config["vault"] = vault
+    if scanner_name in [
+        "Anonymize",
+        "BanTopics",
+        "Code",
+        "Gibberish",
+        "Language",
+        "PromptInjection",
+        "Toxicity",
+    ]:
+        scanner_config["use_onnx"] = True
+    return input_scanners.get_scanner_by_name(scanner_name, scanner_config)
+def _get_output_scanner(
+    scanner_name: str,
+    scanner_config: Optional[Dict],
+    *,
+    vault: Vault,
+):
+    if scanner_config is None:
+        scanner_config = {}
+    if scanner_name == "Deanonymize":
+        scanner_config["vault"] = vault
+    if scanner_name in [
+        "BanTopics",
+        "Bias",
+        "Code",
+        "FactualConsistency",
+        "Gibberish",
+        "Language",
+        "LanguageSame",
+        "MaliciousURLs",
+        "NoRefusal",
+        "Relevance",
+        "Sensitive",
+        "Toxicity",
+    ]:
+        scanner_config["use_onnx"] = True
+    return output_scanners.get_scanner_by_name(scanner_name, scanner_config)

app/schemas.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import Dict
+from pydantic import BaseModel
+class AnalyzePromptRequest(BaseModel):
+    prompt: str
+class AnalyzePromptResponse(BaseModel):
+    sanitized_prompt: str
+    is_valid: bool
+    scanners: Dict[str, float]
+class AnalyzeOutputRequest(BaseModel):
+    prompt: str
+    output: str
+class AnalyzeOutputResponse(BaseModel):
+    sanitized_output: str
+    is_valid: bool
+    scanners: Dict[str, float]

app/util.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import logging
+import sys
+from os import getpid
+from typing import Dict, Literal
+import psutil
+import structlog
+from llm_guard.util import configure_logger as configure_llm_guard_logger
+LOG_LEVELS = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
+EXTERNAL_LOGGERS = {
+    "transformers",
+}
+def configure_logger(log_level: LOG_LEVELS = "INFO"):
+    """
+    Configures the logger for the package.
+    Args:
+        log_level: The log level to use for the logger. It should be one of the following strings:
+            "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL".
+    """
+    logging.basicConfig(
+        format="[%(asctime)s - %(name)s - %(levelname)s] %(message)s",
+        level=log_level,
+        stream=sys.stdout,
+    )
+    structlog.configure(logger_factory=structlog.stdlib.LoggerFactory())
+    for log_name in EXTERNAL_LOGGERS:
+        logging.getLogger(log_name).setLevel(logging.WARNING)
+    configure_llm_guard_logger(log_level)
+def get_resource_utilization() -> Dict:
+    """
+    Returns the current resource utilization of the system.
+    Returns:
+        A dictionary containing the current resource utilization of the system.
+    """
+    process = psutil.Process(getpid())
+    # A float representing the current system-wide CPU utilization as a percentage
+    cpu_percent = process.cpu_percent()
+    # A float representing process memory utilization as a percentage
+    memory_percent = process.memory_percent()
+    # Total physical memory
+    total_memory_bytes = psutil.virtual_memory().total
+    return {
+        "cpu_utilization_percent": cpu_percent,
+        "memory_utilization_percent": memory_percent,
+        "total_memory_available_bytes": total_memory_bytes,
+    }

app/version.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "0.0.6"

config/scanners.yml ADDED Viewed

	@@ -0,0 +1,162 @@

+app:
+  name: ${APP_NAME:LLM Guard API}
+  log_level: ${LOG_LEVEL:INFO}
+  scan_fail_fast: ${SCAN_FAIL_FAST:false}
+  scan_prompt_timeout: ${SCAN_PROMPT_TIMEOUT:10}
+  scan_output_timeout: ${SCAN_OUTPUT_TIMEOUT:30}
+  port: ${APP_PORT:7860}
+cache:
+  ttl: ${CACHE_TTL:3600}
+  #max_size: ${CACHE_MAX_SIZE:1000}
+rate_limit:
+  enabled: ${RATE_LIMIT_ENABLED:true}
+  limit: ${RATE_LIMIT_LIMIT:100/minute}
+#auth:
+#  type: http_bearer
+#  token: ${AUTH_TOKEN:}
+tracing:
+  exporter: ${TRACING_EXPORTER:console}
+  endpoint: ${TRACING_OTEL_ENDPOINT:} # Example: "<traces-endpoint>/v1/traces"
+metrics:
+  exporter: ${METRICS_TYPE:prometheus}
+  endpoint: ${METRICS_ENDPOINT:} # Example: "<metrics-endpoint>/v1/metrics"
+# Scanners are applied in the order they are listed here.
+input_scanners:
+  - type: Anonymize
+    params:
+      # allowed_names: []
+      # hidden_names: []
+      # entity_types: []
+      # preamble: ""
+      use_faker: false
+  - type: BanCompetitors
+    params:
+      competitors: ["facebook"]
+      threshold: 0.5
+  - type: BanSubstrings
+    params:
+      substrings: ["test"]
+      match_type: "word"
+      case_sensitive: false
+      redact: false
+      contains_all: false
+  - type: BanTopics
+    params:
+      topics: ["violence"]
+      threshold: 0.6
+  - type: Code
+    params:
+      languages: ["Python"]
+      is_blocked: true
+  - type: Gibberish
+    params:
+      threshold: 0.9
+  - type: InvisibleText
+    params: {}
+  - type: Language
+    params:
+      valid_languages: ["en"]
+  - type: PromptInjection
+    params:
+      threshold: 0.9
+  - type: Regex
+    params:
+      patterns: ["Bearer [A-Za-z0-9-._~+/]+"]
+      is_blocked: true
+      match_type: search
+      redact: true
+  - type: Secrets
+    params:
+      redact_mode: "all"
+  - type: Sentiment
+    params:
+      # lexicon: "vader_lexicon"
+      threshold: -0.1
+  - type: TokenLimit
+    params:
+      limit: 4096
+      encoding_name: "cl100k_base"
+  - type: Toxicity
+    params:
+      threshold: 0.5
+output_scanners:
+  - type: BanCompetitors
+    params:
+      competitors: ["facebook"]
+      threshold: 0.5
+  - type: BanSubstrings
+    params:
+      substrings: ["test"]
+      match_type: "word"
+      case_sensitive: false
+      redact: false
+      contains_all: false
+  - type: BanTopics
+    params:
+      topics: ["violence"]
+      threshold: 0.6
+  - type: Bias
+    params:
+      threshold: 0.75
+  - type: Code
+    params:
+      languages: ["Python"]
+      is_blocked: true
+  - type: Deanonymize
+    params:
+      matching_strategy: "exact"
+  - type: FactualConsistency
+    params:
+      minimum_score: 0.5
+  - type: Gibberish
+    params:
+      threshold: 0.9
+  - type: JSON
+    params:
+      required_elements: 0
+      repair: true
+  - type: Language
+    params:
+      valid_languages: ["en"]
+  - type: LanguageSame
+    params: {}
+  - type: MaliciousURLs
+    params:
+      threshold: 0.75
+  - type: NoRefusal
+    params:
+      threshold: 0.5
+  - type: ReadingTime
+    params:
+      max_time: 5
+      truncate: false
+  - type: Regex
+    params:
+      patterns: ["Bearer [A-Za-z0-9-._~+/]+"]
+      is_blocked: true
+      match_type: search
+      redact: true
+  - type: Relevance
+    params:
+      threshold: 0.5
+  - type: Sensitive
+    params:
+      # entity_types:
+      redact: false
+      threshold: 0.0
+  - type: Sentiment
+    params:
+      threshold: -0.1
+      # lexicon: "vader_lexicon"
+  - type: Toxicity
+    params:
+      threshold: 0.5
+  - type: URLReachability
+    params: {}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,11 @@

+version: '3.8'
+services:
+  llm_guard_api:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "7860:7860"
+    volumes:
+      - ./config/scanners.yml:/home/user/app/config/scanners.yml

openapi.json ADDED Viewed

	@@ -0,0 +1,319 @@

+{
+  "openapi": "3.1.0",
+  "info": {
+    "title": "LLM Guard API",
+    "description": "API to run LLM Guard scanners.",
+    "version": "0.0.6"
+  },
+  "paths": {
+    "/": {
+      "get": {
+        "tags": [
+          "Main"
+        ],
+        "summary": "Read Root",
+        "operationId": "read_root__get",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      }
+    },
+    "/healthz": {
+      "get": {
+        "tags": [
+          "Health"
+        ],
+        "summary": "Healthcheck",
+        "operationId": "healthcheck_healthz_get",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      }
+    },
+    "/readyz": {
+      "get": {
+        "tags": [
+          "Health"
+        ],
+        "summary": "Liveliness",
+        "operationId": "liveliness_readyz_get",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      }
+    },
+    "/analyze/output": {
+      "post": {
+        "tags": [
+          "Analyze"
+        ],
+        "summary": "Analyze Output",
+        "description": "Analyze an output and return the sanitized output and the results of the scanners",
+        "operationId": "analyze_output_analyze_output_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AnalyzeOutputRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AnalyzeOutputResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBearer": []
+          }
+        ]
+      }
+    },
+    "/analyze/prompt": {
+      "post": {
+        "tags": [
+          "Analyze"
+        ],
+        "summary": "Analyze Prompt",
+        "description": "Analyze a prompt and return the sanitized prompt and the results of the scanners",
+        "operationId": "analyze_prompt_analyze_prompt_post",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/AnalyzePromptRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/AnalyzePromptResponse"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Validation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HTTPValidationError"
+                }
+              }
+            }
+          }
+        },
+        "security": [
+          {
+            "HTTPBearer": []
+          }
+        ]
+      }
+    },
+    "/metrics": {
+      "get": {
+        "tags": [
+          "Metrics"
+        ],
+        "summary": "Metrics",
+        "operationId": "metrics_metrics_get",
+        "responses": {
+          "200": {
+            "description": "Successful Response",
+            "content": {
+              "application/json": {
+                "schema": {}
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "AnalyzeOutputRequest": {
+        "properties": {
+          "prompt": {
+            "type": "string",
+            "title": "Prompt"
+          },
+          "output": {
+            "type": "string",
+            "title": "Output"
+          }
+        },
+        "type": "object",
+        "required": [
+          "prompt",
+          "output"
+        ],
+        "title": "AnalyzeOutputRequest"
+      },
+      "AnalyzeOutputResponse": {
+        "properties": {
+          "sanitized_output": {
+            "type": "string",
+            "title": "Sanitized Output"
+          },
+          "is_valid": {
+            "type": "boolean",
+            "title": "Is Valid"
+          },
+          "scanners": {
+            "additionalProperties": {
+              "type": "number"
+            },
+            "type": "object",
+            "title": "Scanners"
+          }
+        },
+        "type": "object",
+        "required": [
+          "sanitized_output",
+          "is_valid",
+          "scanners"
+        ],
+        "title": "AnalyzeOutputResponse"
+      },
+      "AnalyzePromptRequest": {
+        "properties": {
+          "prompt": {
+            "type": "string",
+            "title": "Prompt"
+          }
+        },
+        "type": "object",
+        "required": [
+          "prompt"
+        ],
+        "title": "AnalyzePromptRequest"
+      },
+      "AnalyzePromptResponse": {
+        "properties": {
+          "sanitized_prompt": {
+            "type": "string",
+            "title": "Sanitized Prompt"
+          },
+          "is_valid": {
+            "type": "boolean",
+            "title": "Is Valid"
+          },
+          "scanners": {
+            "additionalProperties": {
+              "type": "number"
+            },
+            "type": "object",
+            "title": "Scanners"
+          }
+        },
+        "type": "object",
+        "required": [
+          "sanitized_prompt",
+          "is_valid",
+          "scanners"
+        ],
+        "title": "AnalyzePromptResponse"
+      },
+      "HTTPValidationError": {
+        "properties": {
+          "detail": {
+            "items": {
+              "$ref": "#/components/schemas/ValidationError"
+            },
+            "type": "array",
+            "title": "Detail"
+          }
+        },
+        "type": "object",
+        "title": "HTTPValidationError"
+      },
+      "ValidationError": {
+        "properties": {
+          "loc": {
+            "items": {
+              "anyOf": [
+                {
+                  "type": "string"
+                },
+                {
+                  "type": "integer"
+                }
+              ]
+            },
+            "type": "array",
+            "title": "Location"
+          },
+          "msg": {
+            "type": "string",
+            "title": "Message"
+          },
+          "type": {
+            "type": "string",
+            "title": "Error Type"
+          }
+        },
+        "type": "object",
+        "required": [
+          "loc",
+          "msg",
+          "type"
+        ],
+        "title": "ValidationError"
+      }
+    },
+    "securitySchemes": {
+      "HTTPBearer": {
+        "type": "http",
+        "scheme": "bearer"
+      }
+    }
+  }
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,57 @@

+[project]
+name = "llm-guard-api"
+description = "LLM Guard API is a deployment of LLM Guard as an API."
+authors = [
+  { name = "Protect AI", email = "[email protected]"}
+]
+readme = "README.md"
+dynamic = ["version"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+]
+requires-python = ">=3.9"
+dependencies = [
+    "asyncio==3.4.3",
+    "fastapi==0.110.0",
+    "llm-guard==0.3.10",
+    "pydantic==1.10.14",
+    "pyyaml==6.0.1",
+    "uvicorn[standard]==0.29.0",
+    "structlog>=24",
+    "slowapi==0.1.9",
+    "opentelemetry-instrumentation-fastapi==0.44b0",
+    "opentelemetry-api==1.23.0",
+    "opentelemetry-sdk==1.23.0",
+    "opentelemetry-exporter-otlp-proto-http==1.23.0",
+    "opentelemetry-exporter-prometheus==0.44b0",
+    "opentelemetry-sdk-extension-aws==2.0.1",
+    "opentelemetry-propagator-aws-xray==1.0.1"
+]
+[project.optional-dependencies]
+cpu = [
+  "llm-guard[onnxruntime]==0.3.10",
+]
+gpu = [
+  "llm-guard[onnxruntime-gpu]==0.3.10",
+]
+[tool.setuptools]
+packages = ["app"]
+[tool.setuptools.dynamic]
+version = {attr = "app.version.__version__"}
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+[project.scripts]
+llm_guard_api = "app.app:run_app"