Spaces:

AMontiB
/

Dimostratore_FF4ALL

Sleeping

App Files Files Community

AMontiB commited on Jul 14

Commit

734982a

1 Parent(s): 7ea5247

upload

Browse files

Files changed (15) hide show

.gitattributes +1 -0
.gitignore +11 -0
README.md +67 -8
attribution_demonstrator/__init__.py +0 -0
attribution_demonstrator/assets/Logo_ID.png +3 -0
attribution_demonstrator/assets/Logo_MUR.png +3 -0
attribution_demonstrator/assets/Logo_NGEU.png +3 -0
attribution_demonstrator/assets/Logo_Serics.png +3 -0
attribution_demonstrator/connector/__init__.py +0 -0
attribution_demonstrator/connector/abstract_connector.py +24 -0
attribution_demonstrator/connector/azure_model_connector.py +180 -0
attribution_demonstrator/main.py +177 -0
pyproject.toml +20 -0
requirements.txt +5 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+roles/
+venv*
+.idea/
+*.log
+*.pyc
+__pychache__
+*.sqlite3
+.gradio/

README.md CHANGED Viewed

@@ -1,13 +1,72 @@
 ---
-title: Dimostratore FF4ALL
-emoji: 🏢
-colorFrom: green
-colorTo: purple
 sdk: gradio
-sdk_version: 5.36.2
-app_file: app.py
 pinned: false
-license: cc-by-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: FF4ALL WILD DEMONSTRATOR
+emoji: 🏞️
+colorFrom: purple
+colorTo: red
 sdk: gradio
+sdk_version: 5.33.0
+app_file: attribution_demonstrator/main.py
 pinned: false
 ---
+# Demonstrator
+## Run the demonstrator
+Export the required environment variables:
+```bash
+export AZURE_CONNECTION_STRING="...."
+export CONTAINER_NAME="..."
+export DATABRICKS_CLIENT_ID="..."
+export DATABRICKS_SECRET="..."
+export MODELS="..."
+```
+## Using pip
+1. install the required packages:
+```bash
+pip install -r requirements.txt
+```
+2. run the demonstrator with the command:
+```bash
+   export PYTHONPATH=${PWD}/.
+   python -m attribution_demonstrator.main
+```
+## Using uv
+1. install uv, please refer to the [uv documentation](https://docs.astral.sh/uv/)
+2. run the demonstrator with the command:
+```bash
+cd src
+export PYTHONPATH=${PWD}/.
+uv run -m attribution_demonstrator.main
+```
+## Exporting uv env to a requirements.txt
+```bash
+ uv export --no-emit-workspace --no-dev --no-annotate --no-header --no-hashes --output-file requirements.txt
+```
+# Environment variables
+## AZURE_CONNECTION_STRING
+The connection string to the Azure Blob Storage account where the model files are stored.
+## CONTAINER_NAME
+The name of the Azure Blob Storage container used for temporary image storage.
+## DATABRICKS_CLIENT_ID and DATABRICKS_SECRET
+The client ID and secret for the Databricks workspace where the models are hosted. These are used to authenticate and access the models.
+The token can be generate in the Databricks workspace under `Workspace settings > Identity and access > Service principals`.
+## MODELS
+A json string containing the model names and their corresponding urls. The format is:
+```json
+{"model_name_1": "url", "model_name_2": "url"}
+```

attribution_demonstrator/__init__.py ADDED Viewed

File without changes

attribution_demonstrator/assets/Logo_ID.png ADDED Viewed

Git LFS Details

SHA256: 659d270bf9864923dd49eadc9d524534ac04d1355134cb250a10d1a2930c74c2
Pointer size: 130 Bytes
Size of remote file: 82.6 kB

attribution_demonstrator/assets/Logo_MUR.png ADDED Viewed

Git LFS Details

SHA256: 8a999bf471b7dafccdf3d48eccf10d8a3da8bd2cae4e4a7dfff084b897bdca57
Pointer size: 131 Bytes
Size of remote file: 187 kB

attribution_demonstrator/assets/Logo_NGEU.png ADDED Viewed

Git LFS Details

SHA256: 21d6e637a5849a3bec66382e0990661e0950b3335f6c319b211ac64d1500bb48
Pointer size: 130 Bytes
Size of remote file: 13.4 kB

attribution_demonstrator/assets/Logo_Serics.png ADDED Viewed

Git LFS Details

SHA256: 54a32966557b591b216991d14e22e631eef810b322ed9b89da0c13244714a10f
Pointer size: 131 Bytes
Size of remote file: 180 kB

attribution_demonstrator/connector/__init__.py ADDED Viewed

File without changes

attribution_demonstrator/connector/abstract_connector.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from abc import ABC, abstractmethod
+from typing import List
+from PIL import Image
+class ModelConnector(ABC):
+    """
+    Abstract base class for model connectors.
+    """
+    @abstractmethod
+    async def perform_inference(self, image: Image, model_list: List[str]) -> float:
+        """
+        Perform inference on the given image or DataFrame.
+        Args:
+            image (Image | DataFrame): The input image or DataFrame.
+            model_list (List[str]): List of model identifiers to use for inference.
+        Returns:
+            float: The predicted score.
+        """
+        raise NotImplementedError("Subclasses must implement this method.")

attribution_demonstrator/connector/azure_model_connector.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import asyncio
+import logging
+import uuid
+from datetime import datetime
+from io import BytesIO
+from typing import Dict, Tuple, Union, List
+import httpx
+import pandas as pd
+from PIL import Image
+from azure.storage.blob import BlobServiceClient
+from attribution_demonstrator.connector.abstract_connector import ModelConnector
+logger = logging.getLogger("orchestrator.common.manager.image")
+class AzureDatabricksModelConnector(ModelConnector):
+    """
+    Azure Model Connector for models serve on Databricks.
+    This class is responsible for performing inference on images using a model hosted on Azure.
+    """
+    def __init__(self,
+                 models: Dict[str, str],
+                 databricks_host: str,
+                 databricks_client_id: str,
+                 databricks_secret: str,
+                 azure_connection_string: str,
+                 azure_container_name: str
+                 ):
+        """
+        Initialize the AzureModelConnector with the model URI and key.
+        """
+        super().__init__()
+        self.models = models
+        self.azure_connection_string = azure_connection_string
+        self.azure_container_name = azure_container_name
+        self._databricks_secret = databricks_secret
+        self._databricks_client_id = databricks_client_id
+        self._databricks_host = databricks_host
+        self._token = None
+        self._token_expiration = None
+    def upload_image_to_azure(self, image: Image) -> str:
+        """
+        Upload the image to Azure Blob Storage and return the blob URL.
+        Args:
+            image (Image): The input image.
+        Returns:
+            str: The URL of the uploaded image in Azure Blob Storage.
+        """
+        uploaded_file_name = f"ff4all_demostrator_{uuid.uuid4()}.tiff"
+        logger.info(f"Uploading [{uploaded_file_name}] image to Azure Blob Storage")
+        blob_service_client = BlobServiceClient.from_connection_string(self.azure_connection_string)
+        blob_client = blob_service_client.get_blob_client(container=self.azure_container_name, blob=uploaded_file_name)
+        tiff_bytes = BytesIO()
+        image.save(tiff_bytes, format="TIFF", compression=None)
+        tiff_bytes.seek(0)
+        blob_client.upload_blob(tiff_bytes, overwrite=True)
+        logger.info("Image uploaded successfully")
+        return uploaded_file_name
+    def _delete_blob(self, blob_name: str):
+        """
+        Delete the blob from Azure Blob Storage.
+        Args:
+            blob_name (str): The name of the blob to delete.
+        """
+        logger.info(f"Deleting blob {blob_name} from Azure Blob Storage")
+        blob_service_client = BlobServiceClient.from_connection_string(self.azure_connection_string)
+        blob_client = blob_service_client.get_blob_client(container=self.azure_container_name, blob=blob_name)
+        blob_client.delete_blob()
+        logger.info("Blob deleted successfully")
+    def _get_databricks_token(self) -> str:
+        """
+        Get the Databricks token for authentication.
+        Returns:
+            str: The Databricks token.
+        """
+        if self._token and self._token_expiration and self._token_expiration > datetime.now():
+            logger.info(f"Using cached Databricks token, valid until {self._token_expiration.isoformat()}")
+            return self._token
+        else:
+            logger.info("Fetching new Databricks token")
+            client = httpx.Client()
+            response = client.post(
+                f"{self._databricks_host}/oidc/v1/token",
+                auth=(self._databricks_client_id, self._databricks_secret),
+                data={'grant_type': 'client_credentials', 'scope': 'all-apis'}
+            )
+            if response.status_code in [200, 201, 202]:
+                token_data = response.json()
+                self._token = token_data['access_token']
+                self._token_expiration = datetime.now() + pd.to_timedelta(token_data['expires_in'], unit='s')
+                return self._token
+            else:
+                raise Exception(f"Failed to get Databricks token: {response.text}")
+    async def invoke_model(self, client: httpx.AsyncClient, model_id: str, model_url: str, payload: dict, auth_token: str) -> Tuple[str, Union[list | Exception]]:
+        logger.info(f"Invoking model {model_id}")
+        response = await client.post(
+            model_url,
+            json=payload,
+            headers={'Authorization': f'Bearer {auth_token}',
+                     'Content-Type': 'application/json'},
+            timeout=600  # Set a timeout of 10 minutes
+        )
+        if isinstance(response, Exception):
+            result = f"Error: {str(response)}"
+        elif response.status_code == 200:
+            result = response.json()["predictions"][0]["prediction"]  # only one image is processed at a time
+        else:
+            result = f"HTTP {response.status_code}"
+        logger.info(f"Model {model_id} invocation completed")
+        return model_id, result
+    async def invoke_models(self, payload: dict, model_list: List[str]) -> dict:
+        model_to_invoke = {model_id: self.models[model_id] for model_id in model_list if model_id in self.models}
+        logger.info(f"Models to invoke: {model_to_invoke.keys()}")
+        token = self._get_databricks_token()
+        async with httpx.AsyncClient() as client:
+            tasks = [
+                self.invoke_model(
+                    client=client,
+                    model_id=model_id,
+                    model_url=model_url,
+                    payload=payload,
+                    auth_token=token
+                ) for model_id, model_url in model_to_invoke.items()
+            ]
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            logger.info("Invoked all models")
+            return dict(results)
+    async def perform_inference(self, image: Image, model_list: List[str]) -> dict:
+        """
+        Perform inference on the given image using the Azure model.
+        Args:
+            image (Image): The input image.
+            model_list (List[str]): List of model identifiers to use for inference.
+        Returns:
+            float: The predicted score.
+        """
+        uploaded_file_name = self.upload_image_to_azure(image)
+        df = pd.DataFrame([{
+            "file_path": uploaded_file_name
+        }])
+        payload = {"dataframe_split": df.to_dict(orient="split")}
+        logger.info(f"Sending pyload to the models [{payload}]")
+        response = await self.invoke_models(payload, model_list)
+        self._delete_blob(uploaded_file_name)
+        return response

attribution_demonstrator/main.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import asyncio
+import base64
+import json
+import logging.config
+import os
+from typing import List, Any, Tuple
+import gradio as gr
+import matplotlib.pyplot as plt
+import pandas as pd
+import plotly.express as px
+from PIL import Image
+from attribution_demonstrator.connector.azure_model_connector import AzureDatabricksModelConnector
+from pathlib import Path
+ASSETS_DIR = Path(__file__).parent / "assets"  # setting path to logo images
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+#  Configuration
+# ---------------------------------------------------------------------------
+models = json.loads(os.getenv("MODELS"))
+logger.info(f"Models loaded from environment: {models.keys()}")
+connector = AzureDatabricksModelConnector(
+    models=json.loads(os.getenv("MODELS")),
+    databricks_host=os.getenv("DATABRICKS_HOST"),
+    databricks_client_id=os.getenv("DATABRICKS_CLIENT_ID"),
+    databricks_secret=os.getenv("DATABRICKS_SECRET"),
+    azure_connection_string=os.getenv("AZURE_CONNECTION_STRING"),
+    azure_container_name=os.getenv("CONTAINER_NAME")
+)
+# ---------------------------------------------------------------------------
+#  Inference helpers
+# ---------------------------------------------------------------------------
+async def call_tagging_services(image: Image, model_list: List[str]) -> dict:
+    result = await connector.perform_inference(image, model_list)
+    return result
+def process_image(image: Image.Image, model_list: List[str]) -> Tuple[Any, dict]:
+    if not model_list:
+        raise gr.Error("Please select at least one model to perform inference.")
+    result = asyncio.run(call_tagging_services(image, model_list))
+    df_splits = []
+    for model, res in result.items():
+        if isinstance(res, dict):
+            for generator, score in res.items():
+                df_splits.append({
+                    "model": model,
+                    "generator": generator,
+                    "score": score
+                })
+        else:
+            logger.info(f"Skipping model with non-list result: {res}")
+    if df_splits:
+        df = pd.DataFrame(df_splits)
+        fig = px.histogram(df, x="generator", y="score",
+                           color='model', barmode='group')
+        fig.update_layout(xaxis_tickangle=45)
+    else:
+        # return an empty plot if no valid results
+        fig, ax = plt.subplots()
+    return fig, result
+def load_image_as_base64(image_path: str) -> str:
+    """
+    Load an image from the given path and return it as a base64 string.
+    """
+    with open(image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode('utf-8')
+# ---------------------------------------------------------------------------
+#  Gradio UI
+# ---------------------------------------------------------------------------
+demo = gr.Interface(
+    fn=process_image,
+    inputs=[
+        gr.Image(type="pil", label="Upload an Image"),
+        gr.Dropdown(
+            choices=list(models.keys()),
+            label="Select Model/s",
+            multiselect=True,                # removed deprecated type="value"
+        ),
+    ],
+    outputs=[
+        gr.Plot(label="Model Scores Barplot"),
+        gr.JSON(label="Attribution Results"),
+    ],
+    title="Detection of Deep Fake Media and Life-Long Media Authentication (FF4ALL)",
+    description=(
+        "The **FF4ALL** project (Detection of Deep-Fake Media and Life-Long Media Authentication) is part of the "
+        "extended partnership **SERICS – Security and Rights in the CyberSpace**, funded by Italy’s National "
+        "Recovery and Resilience Plan with *Next Generation EU* resources and coordinated by the University of "
+        "Cagliari. FF4ALL’s mission is to design open methodologies, tools and public datasets that make it easier "
+        "to identify manipulated or AI-generated images, video and audio, and to preserve their authenticity "
+        "throughout the entire life-cycle of a file. The research blends computer vision, machine learning, "
+        "cryptography and blockchain to create a unified framework for deep-fake detection, source attribution and "
+        "tamper-proof traceability. The resulting technologies are intended to support journalists, law-enforcement "
+        "agencies, social-media platforms and ordinary citizens in safeguarding the reliability of digital "
+        "information. The project also champions an open-source ethos: it publishes code, data and evaluation "
+        "protocols under permissive licences, encourages the adoption of open standards, and trains a new "
+        "generation of specialists in forensic media analysis. Through close collaboration with industrial partners "
+        "and public institutions, FF4ALL aims to build a resilient national ecosystem capable of shielding society "
+        "from the threats posed by disinformation. The demo you are about to use is a public proof-of-concept that "
+        "exposes the classifiers developed so far, allowing real-time testing and community feedback.\n\n"
+        "### How this demo works\n\n"
+        "Upload a JPG or PNG image that you suspect was generated by a diffusion or GAN-based model, then tick one "
+        "or more of the **four classifiers** provided by FF4ALL partners:\n\n"
+        "• **DE-FAKE**: operates as a novel hybrid classifier that uniquely leverages multimodal features. "
+        "It utilizes both the image and its corresponding text prompt, processing them through CLIP's respective encoders. "
+        "For images lacking a prompt, it employs the Blip2 model to generate an estimated description, ensuring its dual-input mechanism can always be used.\n\n"
+        "• **CLIP+MLP**: uses the CLIP Large model to extract image features without considering any text prompt. "
+        "These features are then processed by one of two separate classifiers: either a Multi-Layer Perceptron (MLP) with two hidden layers, trained with an Adam optimizer.\n\n"
+        "• **EfficientNetB4**: operates on a patch-level basis, analyzing 50 randomly extracted 96x96 pixel patches from an image. "
+        "The final attribution score is the average of the scores from these patches, a method designed to enhance robustness against common manipulations "
+        "like compression by focusing on synthetic artifacts over semantic content.\n\n"
+        "• **Vision Transformer Classifier (VTC)**: employs the ViT-Base model of [1] pre-trained on ImageNet and fine-tuned on the WILD dataset. "
+        "This model processes images by dividing them into 16x16 pixel patches, which are then fed into the transformer's encoder. "
+        "The ultimate classification score is derived by aggregating predictions from each patch, a strategy intended to improve resilience to localized image distortions.\n\n"
+        "Each network is trained to recognise the visual fingerprints left by popular generators (e.g. Stable "
+        "Diffusion, Midjourney, DALL-E 2). Once processed, the app returns a probability distribution that shows "
+        "how likely the image was produced by any of the supported generators.  \n\n "
+        "**Note:** the detectors are *not* intended for authentic photographs — if you upload a real photo the "
+        "scores will be uninformative.\n\n "
+        "Reference: WILD: a new in-the-Wild Image Linkage Dataset for synthetic image attribution, "
+        "Pietro Bongini, Sara Mandelli, Andrea Montibeller, Mirko Casu, Orazio Pontorno, Claudio Vittorio Ragaglia, "
+        "Luca Zanchetta, Mattia Aquilina, Taiba Majid Wani, Luca Guarnera, Benedetta Tondi, Giulia Boato, Paolo Bestagini, "
+        "Irene Amerini, Francesco De Natale, Sebastiano Battiato, Mauro Barni. "
+        "Link to paper: https://arxiv.org/abs/2504.19595) \n\n"
+        "[1] A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai,"
+        "T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly et al., "
+        "An image is worth 16x16 words: Transformers for image recognition "
+        "at scale,” arXiv preprint arXiv:2010.11929, 2020. \n\n"
+    ),
+    article=f"""
+    <br><br>
+    <div style="display:flex;justify-content:center;align-items:center;gap:2rem;flex-wrap:wrap;">
+        <!-- SERICS logo -->
+        <img src="data:image/png;base64,{load_image_as_base64(f'{ASSETS_DIR}/Logo_Serics.png')}" alt="SERICS logo" style="height:60px;" />
+        <!-- Three-logo banner: NGEU, MUR, Italia Domani -->
+        <div style="display:flex;gap:1rem;">
+            <img src="data:image/png;base64,{load_image_as_base64(f'{ASSETS_DIR}/Logo_NGEU.png')}" alt="Next Generation EU logo" style="height:60px;" />
+            <img src="data:image/png;base64,{load_image_as_base64(f'{ASSETS_DIR}/Logo_MUR.png')}" alt="MUR logo" style="height:60px;" />
+            <img src="data:image/png;base64,{load_image_as_base64(f'{ASSETS_DIR}/Logo_ID.png')}" alt="Italia Domani logo" style="height:60px;" />
+        </div>
+    </div>
+    """,
+    allow_flagging="never",
+)
+# ---------------------------------------------------------------------------
+#  Entrypoint
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    # demo.launch()
+    gr.set_static_paths([str(ASSETS_DIR)])  # Ensure assets are served correctly
+    demo.launch(allowed_paths=[str(ASSETS_DIR)])

pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[project]
+name = "attribution-demonstrator"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "azure-storage-blob>=12.25.1",
+    "gradio>=5.33.0,<6",
+    "matplotlib>=3.10.3",
+    "plotly>=6.1.2",
+    "seaborn>=0.13.2",
+]
+[dependency-groups]
+dev = [
+    "flake8>=7.2.0",
+    "pre-commit>=4.2.0",
+    "ruff>=0.11.13",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+azure-storage-blob>=12.25.1
+gradio>=5.33.0,<6
+seaborn~=0.13.2
+matplotlib>=3.10.3
+plotly~=6.1.2

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff