Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>LLM Benchmark overview</title> | |
| <style> | |
| body { | |
| font-family: Arial, sans-serif; | |
| background-color: #fdf6fb; | |
| color: #333; | |
| margin: 0; | |
| padding: 20px; | |
| } | |
| h1 { | |
| text-align: center; | |
| color: #d16ba5; | |
| } | |
| .table-container { | |
| overflow-x: auto; | |
| margin-top: 20px; | |
| position: relative; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 0 auto; | |
| background-color: #fff; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| table-layout: fixed; | |
| } | |
| th, td { | |
| padding: 10px; | |
| text-align: left; | |
| border: 1px solid #ddd; | |
| overflow: hidden; | |
| text-overflow: ellipsis; | |
| white-space: nowrap; | |
| position: relative; | |
| } | |
| th { | |
| background-color: #f7d9eb; | |
| color: #333; | |
| font-weight: bold; | |
| } | |
| th.resizable { | |
| position: relative; | |
| } | |
| th.resizable .resizer { | |
| position: absolute; | |
| top: 0; | |
| right: 0; | |
| width: 5px; | |
| height: 100%; | |
| cursor: col-resize; | |
| background-color: transparent; | |
| } | |
| td.expandable { | |
| cursor: pointer; | |
| } | |
| td:nth-child(2) { | |
| background-color: #fcebf7; | |
| } | |
| .filter { | |
| margin-bottom: 20px; | |
| text-align: center; | |
| } | |
| .filter label { | |
| font-size: 16px; | |
| margin-right: 10px; | |
| color: #d16ba5; | |
| } | |
| .filter select { | |
| padding: 5px; | |
| font-size: 16px; | |
| border: 1px solid #ccc; | |
| border-radius: 5px; | |
| } | |
| .expanded { | |
| white-space: normal; | |
| background-color: #fcebf7; | |
| } | |
| .modal { | |
| position: fixed; | |
| top: 50%; | |
| left: 50%; | |
| transform: translate(-50%, -50%); | |
| background-color: #fff; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
| padding: 20px; | |
| z-index: 1000; | |
| border-radius: 10px; | |
| max-width: 80%; | |
| max-height: 80%; | |
| overflow: auto; | |
| } | |
| .overlay { | |
| position: fixed; | |
| top: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 100%; | |
| background: rgba(0, 0, 0, 0.5); | |
| z-index: 999; | |
| white-space: pre-wrap; | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>LLM Benchmark overview</h1> | |
| <div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div> | |
| <div class="filter"> | |
| <label for="metricFilter">Filter by Evaluated task:</label> | |
| <select id="metricFilter"> | |
| <option value="">All</option> | |
| </select> | |
| </div> | |
| <div class="table-container"> | |
| <table id="csvTable"> | |
| <thead> | |
| <!-- Headers will be dynamically added --> | |
| </thead> | |
| <tbody> | |
| <!-- Rows will be dynamically added here --> | |
| </tbody> | |
| </table> | |
| </div> | |
| <div class="overlay" id="overlay" style="display: none;"></div> | |
| <div class="modal" id="modal" style="display: none;"></div> | |
| <script> | |
| function parseCSV(content) { | |
| const rows = []; | |
| let currentRow = []; | |
| let currentField = ''; | |
| let insideQuotes = false; | |
| for (let i = 0; i < content.length; i++) { | |
| const char = content[i]; | |
| if (char === '"') { | |
| insideQuotes = !insideQuotes; | |
| } else if (char === ',' && !insideQuotes) { | |
| currentRow.push(currentField.trim()); | |
| currentField = ''; | |
| } else if (char === '\n' && !insideQuotes) { | |
| currentRow.push(currentField.trim()); | |
| rows.push(currentRow); | |
| currentRow = []; | |
| currentField = ''; | |
| } else { | |
| currentField += char; | |
| } | |
| } | |
| if (currentField) currentRow.push(currentField.trim()); | |
| if (currentRow.length > 0) rows.push(currentRow); | |
| const headers = rows.shift(); | |
| return { headers, rows }; | |
| } | |
| async function loadCSVFromHuggingFace(dataset, filename, token) { | |
| const url = `https://huggingface.co/datasets/${dataset}/resolve/main/${filename}`; | |
| const response = await fetch(url, { | |
| headers: { | |
| 'Authorization': `Bearer ${token}`, | |
| }, | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`Failed to fetch file: ${response.statusText}`); | |
| } | |
| const content = await response.text(); | |
| return parseCSV(content); | |
| } | |
| const metricFilter = document.getElementById('metricFilter'); | |
| const table = document.getElementById('csvTable'); | |
| const tableHead = table.querySelector('thead'); | |
| const tableBody = table.querySelector('tbody'); | |
| const overlay = document.getElementById('overlay'); | |
| const modal = document.getElementById('modal'); | |
| function makeResizable() { | |
| const thElements = document.querySelectorAll('th'); | |
| thElements.forEach(th => { | |
| const resizer = document.createElement('div'); | |
| resizer.classList.add('resizer'); | |
| th.appendChild(resizer); | |
| let startX; | |
| let startWidth; | |
| resizer.addEventListener('mousedown', (e) => { | |
| startX = e.pageX; | |
| startWidth = th.offsetWidth; | |
| document.addEventListener('mousemove', resizeColumn); | |
| document.addEventListener('mouseup', stopResize); | |
| }); | |
| function resizeColumn(e) { | |
| const newWidth = startWidth + (e.pageX - startX); | |
| th.style.width = `${newWidth}px`; | |
| } | |
| function stopResize() { | |
| document.removeEventListener('mousemove', resizeColumn); | |
| document.removeEventListener('mouseup', stopResize); | |
| } | |
| }); | |
| } | |
| function populateFilterOptions(data, headerIndex) { | |
| const uniqueMetricTypes = [...new Set(data.map(row => row[headerIndex]))]; | |
| uniqueMetricTypes.forEach(type => { | |
| const option = document.createElement('option'); | |
| option.value = type; | |
| option.textContent = type; | |
| metricFilter.appendChild(option); | |
| }); | |
| } | |
| function populateTable(headers, rows, filterValue, headerIndex) { | |
| tableHead.innerHTML = ''; | |
| tableBody.innerHTML = ''; | |
| const headerRow = document.createElement('tr'); | |
| headers.forEach(header => { | |
| const th = document.createElement('th'); | |
| th.textContent = header; | |
| th.classList.add('resizable'); | |
| headerRow.appendChild(th); | |
| }); | |
| tableHead.appendChild(headerRow); | |
| rows | |
| .filter(row => !filterValue || row[headerIndex] === filterValue) | |
| .sort((a, b) => a[0].localeCompare(b[0])) | |
| .forEach(row => { | |
| const tr = document.createElement('tr'); | |
| row.forEach((value, index) => { | |
| const td = document.createElement('td'); | |
| if (headers[index] === 'Paper' && value) { | |
| const link = document.createElement('a'); | |
| link.href = value; | |
| link.textContent = 'paper link'; | |
| link.target = '_blank'; | |
| td.appendChild(link); | |
| } else if (headers[index] === 'HF or Git link' && value) { | |
| const link = document.createElement('a'); | |
| link.href = value; | |
| link.textContent = 'dataset link'; | |
| link.target = '_blank'; | |
| td.appendChild(link); | |
| } else { | |
| td.textContent = value; | |
| } | |
| td.classList.add('expandable'); | |
| td.title = 'Click to expand'; | |
| td.addEventListener('click', () => { | |
| overlay.style.display = 'block'; | |
| modal.style.display = 'block'; | |
| modal.textContent = value; | |
| modal.style.whiteSpace = 'pre-wrap'; | |
| }); | |
| tr.appendChild(td); | |
| }); | |
| tableBody.appendChild(tr); | |
| }); | |
| makeResizable(); | |
| } | |
| overlay.addEventListener('click', () => { | |
| overlay.style.display = 'none'; | |
| modal.style.display = 'none'; | |
| }); | |
| metricFilter.addEventListener('change', () => { | |
| const filterValue = metricFilter.value; | |
| populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0); | |
| }); | |
| let parsedCSV; | |
| loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN).then(({ headers, rows }) => { | |
| parsedCSV = { headers, rows }; | |
| populateFilterOptions(rows, 0); | |
| populateTable(headers, rows, '', 0); | |
| }); | |
| </script> | |
| </body> | |
| </html> |