llamaindex
/

vdr-2b-multi-v1

+{
+    "bomFormat": "CycloneDX",
+    "specVersion": "1.6",
+    "serialNumber": "urn:uuid:00ca3e26-e155-40c6-801d-90be620fd041",
+    "version": 1,
+    "metadata": {
+        "timestamp": "2025-07-14T11:11:18.744037+00:00",
+        "component": {
+            "type": "machine-learning-model",
+            "bom-ref": "llamaindex/vdr-2b-multi-v1-9a12af53-e3c0-557a-86ff-8b132947222a",
+            "name": "llamaindex/vdr-2b-multi-v1",
+            "externalReferences": [
+                {
+                    "url": "https://huggingface.co/llamaindex/vdr-2b-multi-v1",
+                    "type": "documentation"
+                }
+            ],
+            "modelCard": {
+                "modelParameters": {
+                    "task": "image-to-text",
+                    "architectureFamily": "qwen2_vl",
+                    "modelArchitecture": "Qwen2VLForConditionalGeneration",
+                    "datasets": [
+                        {
+                            "ref": "llamaindex/vdr-multilingual-train-a46edb90-89ea-5b4e-a4aa-7bbe0726c573"
+                        }
+                    ]
+                },
+                "properties": [
+                    {
+                        "name": "library_name",
+                        "value": "sentence-transformers"
+                    },
+                    {
+                        "name": "base_model",
+                        "value": "MrLight/dse-qwen2-2b-mrl-v1"
+                    }
+                ]
+            },
+            "authors": [
+                {
+                    "name": "llamaindex"
+                }
+            ],
+            "licenses": [
+                {
+                    "license": {
+                        "id": "Apache-2.0",
+                        "url": "https://spdx.org/licenses/Apache-2.0.html"
+                    }
+                }
+            ],
+            "tags": [
+                "sentence-transformers",
+                "safetensors",
+                "qwen2_vl",
+                "image-to-text",
+                "transformers",
+                "Qwen2-VL",
+                "en",
+                "it",
+                "fr",
+                "de",
+                "es",
+                "dataset:llamaindex/vdr-multilingual-train",
+                "arxiv:2406.11251",
+                "base_model:MrLight/dse-qwen2-2b-mrl-v1",
+                "base_model:finetune:MrLight/dse-qwen2-2b-mrl-v1",
+                "license:apache-2.0",
+                "text-generation-inference",
+                "endpoints_compatible",
+                "region:us"
+            ]
+        }
+    },
+    "components": [
+        {
+            "type": "data",
+            "bom-ref": "llamaindex/vdr-multilingual-train-a46edb90-89ea-5b4e-a4aa-7bbe0726c573",
+            "name": "llamaindex/vdr-multilingual-train",
+            "data": [
+                {
+                    "type": "dataset",
+                    "bom-ref": "llamaindex/vdr-multilingual-train-a46edb90-89ea-5b4e-a4aa-7bbe0726c573",
+                    "name": "llamaindex/vdr-multilingual-train",
+                    "contents": {
+                        "url": "https://huggingface.co/datasets/llamaindex/vdr-multilingual-train",
+                        "properties": [
+                            {
+                                "name": "language",
+                                "value": "de, it, fr, es, en"
+                            },
+                            {
+                                "name": "size_categories",
+                                "value": "100K<n<1M"
+                            },
+                            {
+                                "name": "pretty_name",
+                                "value": "Multilingual Visual Document Retrieval"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: en {\"split\": \"train\", \"path\": \"en/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: it {\"split\": \"train\", \"path\": \"it/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: fr {\"split\": \"train\", \"path\": \"fr/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: es {\"split\": \"train\", \"path\": \"es/train-*\"}"
+                            },
+                            {
+                                "name": "configs",
+                                "value": "Name of the dataset subset: de {\"split\": \"train\", \"path\": \"de/train-*\"}"
+                            },
+                            {
+                                "name": "license",
+                                "value": "apache-2.0"
+                            }
+                        ]
+                    },
+                    "governance": {
+                        "owners": [
+                            {
+                                "organization": {
+                                    "name": "llamaindex",
+                                    "url": "https://huggingface.co/llamaindex"
+                                }
+                            }
+                        ]
+                    },
+                    "description": "\n\t\n\t\t\n\t\tMultilingual Visual Document Retrieval Dataset\n\t\n\n\n\nThis dataset consists of 500k multilingual query image samples, collected and generated from scratch using public internet pdfs. The queries are synthetic and generated using VLMs (gemini-1.5-pro and Qwen2-VL-72B).\n\nIt was used to train the vdr-2b-multi-v1 retrieval multimodal, multilingual embedding model.\n\n\t\n\t\t\n\t\n\t\n\t\tHow it was created\n\t\n\nThis is the entire data pipeline used to create the Italian subset of this dataset. Each step\u2026 See the full description on the dataset page: https://huggingface.co/datasets/llamaindex/vdr-multilingual-train."
+                }
+            ]
+        }
+    ]
+}