Nora Petrova
commited on
Commit
·
20e666e
1
Parent(s):
99c7281
Add project to new space
Browse files- Dockerfile +19 -0
- README.md +8 -6
- leaderboard-app/.gitignore +41 -0
- leaderboard-app/README.md +113 -0
- leaderboard-app/app/favicon.ico +0 -0
- leaderboard-app/app/globals.css +29 -0
- leaderboard-app/app/layout.js +19 -0
- leaderboard-app/app/page.js +84 -0
- leaderboard-app/components/About.jsx +741 -0
- leaderboard-app/components/DemographicAnalysis.jsx +925 -0
- leaderboard-app/components/LLMComparisonDashboard.jsx +639 -0
- leaderboard-app/components/MetricsBreakdown.jsx +447 -0
- leaderboard-app/components/TaskPerformance.jsx +756 -0
- leaderboard-app/components/Tooltip.jsx +145 -0
- leaderboard-app/eslint.config.mjs +14 -0
- leaderboard-app/jsconfig.json +7 -0
- leaderboard-app/lib/utils.js +708 -0
- leaderboard-app/next.config.mjs +4 -0
- leaderboard-app/package-lock.json +0 -0
- leaderboard-app/package.json +25 -0
- leaderboard-app/postcss.config.mjs +5 -0
- leaderboard-app/public/file.svg +1 -0
- leaderboard-app/public/globe.svg +1 -0
- leaderboard-app/public/leaderboard_data.json +0 -0
- leaderboard-app/public/next.svg +1 -0
- leaderboard-app/public/vercel.svg +1 -0
- leaderboard-app/public/window.svg +1 -0
Dockerfile
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM node:20.11.0-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Copy the rest of the application code
|
| 6 |
+
COPY --chown=user leaderboard-app/ ./
|
| 7 |
+
|
| 8 |
+
RUN npm install
|
| 9 |
+
|
| 10 |
+
# Build the app
|
| 11 |
+
RUN npm run build
|
| 12 |
+
|
| 13 |
+
# Expose the port the app will run on
|
| 14 |
+
# HF Spaces uses port 7860 by default
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Start the app with the correct port
|
| 18 |
+
ENV PORT=7860
|
| 19 |
+
CMD ["npm", "start"]
|
README.md
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
pinned:
|
| 8 |
-
short_description:
|
|
|
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: UX Leaderboard
|
| 3 |
+
emoji: 🥇
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: cyan
|
| 6 |
sdk: docker
|
| 7 |
+
pinned: true
|
| 8 |
+
short_description: Leaderboard of LLMs based on detailed human feedback
|
| 9 |
+
tags:
|
| 10 |
+
- leaderboard
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
leaderboard-app/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
| 2 |
+
|
| 3 |
+
# dependencies
|
| 4 |
+
/node_modules
|
| 5 |
+
/.pnp
|
| 6 |
+
.pnp.*
|
| 7 |
+
.yarn/*
|
| 8 |
+
!.yarn/patches
|
| 9 |
+
!.yarn/plugins
|
| 10 |
+
!.yarn/releases
|
| 11 |
+
!.yarn/versions
|
| 12 |
+
|
| 13 |
+
# testing
|
| 14 |
+
/coverage
|
| 15 |
+
|
| 16 |
+
# next.js
|
| 17 |
+
/.next/
|
| 18 |
+
/out/
|
| 19 |
+
|
| 20 |
+
# production
|
| 21 |
+
/build
|
| 22 |
+
|
| 23 |
+
# misc
|
| 24 |
+
.DS_Store
|
| 25 |
+
*.pem
|
| 26 |
+
|
| 27 |
+
# debug
|
| 28 |
+
npm-debug.log*
|
| 29 |
+
yarn-debug.log*
|
| 30 |
+
yarn-error.log*
|
| 31 |
+
.pnpm-debug.log*
|
| 32 |
+
|
| 33 |
+
# env files (can opt-in for committing if needed)
|
| 34 |
+
.env*
|
| 35 |
+
|
| 36 |
+
# vercel
|
| 37 |
+
.vercel
|
| 38 |
+
|
| 39 |
+
# typescript
|
| 40 |
+
*.tsbuildinfo
|
| 41 |
+
next-env.d.ts
|
leaderboard-app/README.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLM Comparison Leaderboard
|
| 2 |
+
|
| 3 |
+
An interactive dashboard for comparing the performance of state-of-the-art large language models across various tasks and metrics.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- Overall model rankings with comprehensive scoring
|
| 8 |
+
- Task-specific performance analysis
|
| 9 |
+
- Metric breakdowns across different dimensions
|
| 10 |
+
- User satisfaction and experience metrics
|
| 11 |
+
- Interactive visualizations using Recharts
|
| 12 |
+
- Responsive design for all device sizes
|
| 13 |
+
|
| 14 |
+
## Getting Started
|
| 15 |
+
|
| 16 |
+
### Prerequisites
|
| 17 |
+
|
| 18 |
+
- Node.js 16.8 or later
|
| 19 |
+
- Python 3.8 or later (for data processing)
|
| 20 |
+
- Python packages: pandas, numpy
|
| 21 |
+
|
| 22 |
+
### Installation
|
| 23 |
+
|
| 24 |
+
1. Clone the repository:
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
git clone https://github.com/yourusername/llm-comparison-leaderboard.git
|
| 28 |
+
cd llm-comparison-leaderboard
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
2. Install dependencies:
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
npm install
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
3. Install Python dependencies (if you plan to process data):
|
| 38 |
+
|
| 39 |
+
```bash
|
| 40 |
+
pip install pandas numpy
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### Using Sample Data
|
| 44 |
+
|
| 45 |
+
The repository includes a sample JSON file with placeholder data in `public/llm_comparison_data.json`. You can start the development server right away to see the dashboard with this data:
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
npm run dev
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
Visit [http://localhost:3000](http://localhost:3000) to see the dashboard.
|
| 52 |
+
|
| 53 |
+
### Processing Your Own Data
|
| 54 |
+
|
| 55 |
+
If you have your own data, follow these steps:
|
| 56 |
+
|
| 57 |
+
1. Place your CSV data file in the `data` directory:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
mkdir -p data
|
| 61 |
+
cp /path/to/your/pilot_data_n20.csv data/
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
2. Run the data processing script:
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
npm run process-data
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
This will:
|
| 71 |
+
- Process the CSV data using the Python script
|
| 72 |
+
- Generate a JSON file in the `public` directory
|
| 73 |
+
- Format the data for the dashboard
|
| 74 |
+
|
| 75 |
+
3. Start the development server:
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
npm run dev
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
## Project Structure
|
| 82 |
+
|
| 83 |
+
- `app/` - Next.js App Router components
|
| 84 |
+
- `page.js` - Main page component that loads data and renders dashboard
|
| 85 |
+
- `layout.js` - Layout component with metadata and global styles
|
| 86 |
+
- `globals.css` - Global styles including Tailwind CSS
|
| 87 |
+
- `components/` - React components
|
| 88 |
+
- `LLMComparisonDashboard.jsx` - The main dashboard component
|
| 89 |
+
- `public/` - Static files
|
| 90 |
+
- `llm_comparison_data.json` - Processed data for the dashboard
|
| 91 |
+
- `lib/` - Utility functions
|
| 92 |
+
- `utils.js` - Helper functions for data processing
|
| 93 |
+
- `scripts/` - Data processing scripts
|
| 94 |
+
- `process_data.js` - Node.js script for running Python processor
|
| 95 |
+
- `process_data.py` - Python script for data processing
|
| 96 |
+
|
| 97 |
+
## Building for Production
|
| 98 |
+
|
| 99 |
+
To build the application for production:
|
| 100 |
+
|
| 101 |
+
```bash
|
| 102 |
+
npm run build
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
To start the production server:
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
npm run start
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
## License
|
| 112 |
+
|
| 113 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
leaderboard-app/app/favicon.ico
ADDED
|
|
leaderboard-app/app/globals.css
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import "tailwindcss";
|
| 2 |
+
|
| 3 |
+
:root {
|
| 4 |
+
--background: #ffffff;
|
| 5 |
+
--foreground: #171717;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
@theme inline {
|
| 9 |
+
--color-background: var(--background);
|
| 10 |
+
--color-foreground: var(--foreground);
|
| 11 |
+
--font-sans: var(--font-geist-sans);
|
| 12 |
+
--font-mono: var(--font-geist-mono);
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
/* Force light theme regardless of color scheme preference */
|
| 16 |
+
/* Disable dark mode
|
| 17 |
+
@media (prefers-color-scheme: dark) {
|
| 18 |
+
:root {
|
| 19 |
+
--background: #0a0a0a;
|
| 20 |
+
--foreground: #ededed;
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
*/
|
| 24 |
+
|
| 25 |
+
body {
|
| 26 |
+
background: var(--background);
|
| 27 |
+
color: var(--foreground);
|
| 28 |
+
font-family: Arial, Helvetica, sans-serif;
|
| 29 |
+
}
|
leaderboard-app/app/layout.js
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Inter } from 'next/font/google';
|
| 2 |
+
import './globals.css';
|
| 3 |
+
|
| 4 |
+
const inter = Inter({ subsets: ['latin'] });
|
| 5 |
+
|
| 6 |
+
export const metadata = {
|
| 7 |
+
title: 'LLM Comparison Leaderboard',
|
| 8 |
+
description: 'Interactive leaderboard comparing performance of state-of-the-art large language models across various tasks and metrics.',
|
| 9 |
+
};
|
| 10 |
+
|
| 11 |
+
export default function RootLayout({ children }) {
|
| 12 |
+
return (
|
| 13 |
+
<html lang="en">
|
| 14 |
+
<body className={`${inter.className} bg-gray-50`}>
|
| 15 |
+
{children}
|
| 16 |
+
</body>
|
| 17 |
+
</html>
|
| 18 |
+
);
|
| 19 |
+
}
|
leaderboard-app/app/page.js
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'use client';
|
| 2 |
+
|
| 3 |
+
import { useState, useEffect } from 'react';
|
| 4 |
+
import dynamic from 'next/dynamic';
|
| 5 |
+
import { prepareDataForVisualization } from '../lib/utils';
|
| 6 |
+
|
| 7 |
+
// Dynamically import the dashboard component with SSR disabled
|
| 8 |
+
// This is important because recharts needs to be rendered on the client side
|
| 9 |
+
const LLMComparisonDashboard = dynamic(
|
| 10 |
+
() => import('../components/LLMComparisonDashboard'),
|
| 11 |
+
{ ssr: false }
|
| 12 |
+
);
|
| 13 |
+
|
| 14 |
+
export default function Home() {
|
| 15 |
+
const [data, setData] = useState(null);
|
| 16 |
+
const [loading, setLoading] = useState(true);
|
| 17 |
+
const [error, setError] = useState(null);
|
| 18 |
+
|
| 19 |
+
useEffect(() => {
|
| 20 |
+
async function fetchData() {
|
| 21 |
+
try {
|
| 22 |
+
setLoading(true);
|
| 23 |
+
|
| 24 |
+
// Fetch the data from the JSON file in the public directory
|
| 25 |
+
const response = await fetch('/leaderboard_data.json');
|
| 26 |
+
|
| 27 |
+
if (!response.ok) {
|
| 28 |
+
throw new Error(`Failed to fetch data: ${response.status} ${response.statusText}`);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
const jsonData = await response.json();
|
| 32 |
+
|
| 33 |
+
// Process the data for visualization
|
| 34 |
+
const processedData = prepareDataForVisualization(jsonData);
|
| 35 |
+
|
| 36 |
+
setData(processedData);
|
| 37 |
+
setLoading(false);
|
| 38 |
+
} catch (err) {
|
| 39 |
+
console.error('Error loading data:', err);
|
| 40 |
+
setError(err.message || 'Failed to load data');
|
| 41 |
+
setLoading(false);
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
fetchData();
|
| 46 |
+
}, []);
|
| 47 |
+
|
| 48 |
+
if (loading) {
|
| 49 |
+
return (
|
| 50 |
+
<div className="flex items-center justify-center min-h-screen">
|
| 51 |
+
<div className="text-center">
|
| 52 |
+
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-500 mx-auto mb-4"></div>
|
| 53 |
+
<p className="text-lg text-gray-600">Loading LLM comparison data...</p>
|
| 54 |
+
</div>
|
| 55 |
+
</div>
|
| 56 |
+
);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
if (error) {
|
| 60 |
+
return (
|
| 61 |
+
<div className="flex items-center justify-center min-h-screen">
|
| 62 |
+
<div className="text-center max-w-md p-6 bg-red-50 rounded-lg border border-red-200">
|
| 63 |
+
<svg xmlns="http://www.w3.org/2000/svg" className="h-12 w-12 text-red-500 mx-auto mb-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
| 64 |
+
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 8v4m0 4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
|
| 65 |
+
</svg>
|
| 66 |
+
<h2 className="text-xl font-bold text-red-700 mb-2">Error Loading Data</h2>
|
| 67 |
+
<p className="text-gray-600">{error}</p>
|
| 68 |
+
<button
|
| 69 |
+
onClick={() => window.location.reload()}
|
| 70 |
+
className="mt-4 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 transition-colors"
|
| 71 |
+
>
|
| 72 |
+
Try Again
|
| 73 |
+
</button>
|
| 74 |
+
</div>
|
| 75 |
+
</div>
|
| 76 |
+
);
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
return (
|
| 80 |
+
<main className="min-h-screen p-4">
|
| 81 |
+
{data && <LLMComparisonDashboard data={data} />}
|
| 82 |
+
</main>
|
| 83 |
+
);
|
| 84 |
+
}
|
leaderboard-app/components/About.jsx
ADDED
|
@@ -0,0 +1,741 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import React, { useState } from "react";
|
| 4 |
+
import {
|
| 5 |
+
ChevronDown,
|
| 6 |
+
ChevronUp,
|
| 7 |
+
Info,
|
| 8 |
+
Book,
|
| 9 |
+
Calculator,
|
| 10 |
+
BarChart,
|
| 11 |
+
UserCheck,
|
| 12 |
+
CheckCircle,
|
| 13 |
+
MessageCircle,
|
| 14 |
+
Brain,
|
| 15 |
+
SlidersHorizontal,
|
| 16 |
+
Shield,
|
| 17 |
+
Smile,
|
| 18 |
+
Globe,
|
| 19 |
+
} from "lucide-react";
|
| 20 |
+
|
| 21 |
+
const AboutTab = () => {
|
| 22 |
+
// Task list for easier management
|
| 23 |
+
const tasksUsed = [
|
| 24 |
+
"Following Up on Job Application: Drafting a professional follow-up email",
|
| 25 |
+
"Planning Weekly Meals: Creating a meal plan accommodating dietary restrictions",
|
| 26 |
+
"Creating Travel Itinerary: Planning a European city break",
|
| 27 |
+
"Understanding Complex Topic: Learning about day trading concepts",
|
| 28 |
+
"Generating Creative Ideas: Brainstorming unique birthday gift ideas",
|
| 29 |
+
"Making Decisions Between Options: Comparing tech products for purchase",
|
| 30 |
+
];
|
| 31 |
+
|
| 32 |
+
// State for collapsible sections
|
| 33 |
+
const [openSections, setOpenSections] = useState({
|
| 34 |
+
introduction: true,
|
| 35 |
+
methodology: true,
|
| 36 |
+
metricsCalculation: true,
|
| 37 |
+
metricsExplained: true,
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
// State for active metric tab
|
| 41 |
+
const [activeMetricTab, setActiveMetricTab] = useState("helpfulness");
|
| 42 |
+
|
| 43 |
+
// Toggle section visibility
|
| 44 |
+
const toggleSection = (section) => {
|
| 45 |
+
setOpenSections({
|
| 46 |
+
...openSections,
|
| 47 |
+
[section]: !openSections[section],
|
| 48 |
+
});
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
// Metrics data
|
| 52 |
+
const metricsData = [
|
| 53 |
+
{
|
| 54 |
+
id: "helpfulness",
|
| 55 |
+
title: "Helpfulness",
|
| 56 |
+
icon: <CheckCircle size={18} />,
|
| 57 |
+
color: "bg-green-500",
|
| 58 |
+
description:
|
| 59 |
+
"Evaluates how well the model provides useful, practical assistance that addresses the user's needs and helps them accomplish their goals.",
|
| 60 |
+
metrics: [
|
| 61 |
+
{
|
| 62 |
+
name: "Effectiveness",
|
| 63 |
+
description:
|
| 64 |
+
"How effectively did the model help you accomplish your specific goal?",
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
name: "Comprehensiveness",
|
| 68 |
+
description:
|
| 69 |
+
"How comprehensive was the model's response in addressing all aspects of your request?",
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
name: "Usefulness",
|
| 73 |
+
description:
|
| 74 |
+
"How useful were the model's suggestions or solutions for your needs?",
|
| 75 |
+
},
|
| 76 |
+
],
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
id: "communication",
|
| 80 |
+
title: "Communication",
|
| 81 |
+
icon: <MessageCircle size={18} />,
|
| 82 |
+
color: "bg-blue-500",
|
| 83 |
+
description:
|
| 84 |
+
"Assesses the clarity, coherence, and appropriateness of the model's writing style, including tone and language choices.",
|
| 85 |
+
metrics: [
|
| 86 |
+
{
|
| 87 |
+
name: "Tone and Language Style",
|
| 88 |
+
description:
|
| 89 |
+
"How well did the model match its tone and language style to the context of your interaction?",
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
name: "Conversation Flow",
|
| 93 |
+
description:
|
| 94 |
+
"How natural and conversational were the model's responses?",
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
name: "Detail and Technical Language",
|
| 98 |
+
description:
|
| 99 |
+
"How appropriate was the level of detail and technical language for your needs?",
|
| 100 |
+
},
|
| 101 |
+
],
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
id: "understanding",
|
| 105 |
+
title: "Understanding",
|
| 106 |
+
icon: <Brain size={18} />,
|
| 107 |
+
color: "bg-purple-500",
|
| 108 |
+
description:
|
| 109 |
+
"Measures how well the model comprehends the user's requests, including implicit needs and contextual information.",
|
| 110 |
+
metrics: [
|
| 111 |
+
{
|
| 112 |
+
name: "Accuracy",
|
| 113 |
+
description:
|
| 114 |
+
"How accurately did the model interpret your initial request?",
|
| 115 |
+
},
|
| 116 |
+
{
|
| 117 |
+
name: "Context Memory",
|
| 118 |
+
description:
|
| 119 |
+
"How well did the model maintain context throughout the conversation?",
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
name: "Intuitiveness",
|
| 123 |
+
description:
|
| 124 |
+
"How well did the model pick up on implicit aspects of your request without requiring explicit explanation?",
|
| 125 |
+
},
|
| 126 |
+
],
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
id: "adaptiveness",
|
| 130 |
+
title: "Adaptiveness",
|
| 131 |
+
icon: <SlidersHorizontal size={18} />,
|
| 132 |
+
color: "bg-amber-500",
|
| 133 |
+
description:
|
| 134 |
+
"Measures how well the model adjusts to different user needs, contexts, and feedback throughout a conversation.",
|
| 135 |
+
metrics: [
|
| 136 |
+
{
|
| 137 |
+
name: "Flexibility",
|
| 138 |
+
description:
|
| 139 |
+
"How effectively did the model adjust its responses based on your feedback?",
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
name: "Clarity",
|
| 143 |
+
description:
|
| 144 |
+
"How well did the model clarify ambiguities or misunderstandings?",
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
name: "Conversation Building",
|
| 148 |
+
description:
|
| 149 |
+
"How well did the model build upon previous exchanges in the conversation?",
|
| 150 |
+
},
|
| 151 |
+
],
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
id: "trustworthiness",
|
| 155 |
+
title: "Trustworthiness",
|
| 156 |
+
icon: <Shield size={18} />,
|
| 157 |
+
color: "bg-red-500",
|
| 158 |
+
description:
|
| 159 |
+
"Evaluates transparency, citations, acknowledgment of limitations, and overall user confidence in the model's responses.",
|
| 160 |
+
metrics: [
|
| 161 |
+
{
|
| 162 |
+
name: "Consistency",
|
| 163 |
+
description:
|
| 164 |
+
"How consistent were the model's responses across similar questions?",
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
name: "Confidence",
|
| 168 |
+
description:
|
| 169 |
+
"How confident were you in the accuracy of the model's information?",
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
name: "Transparency",
|
| 173 |
+
description:
|
| 174 |
+
"How transparent was the model about its limitations or uncertainties?",
|
| 175 |
+
},
|
| 176 |
+
],
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
id: "personality",
|
| 180 |
+
title: "Personality",
|
| 181 |
+
icon: <Smile size={18} />,
|
| 182 |
+
color: "bg-pink-500",
|
| 183 |
+
description:
|
| 184 |
+
"Assesses consistency and definition of the model's persona, and alignment with expectations of honesty, empathy, and fairness.",
|
| 185 |
+
metrics: [
|
| 186 |
+
{
|
| 187 |
+
name: "Personality Consistency",
|
| 188 |
+
description: "How consistent was the LLM's personality?",
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
name: "Distinct Personality",
|
| 192 |
+
description: "How well-defined was the LLM's personality?",
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
name: "Honesty Empathy Fairness",
|
| 196 |
+
description:
|
| 197 |
+
"How much did the LLM respond in a way that aligned with your expectations of honesty, empathy, or fairness?",
|
| 198 |
+
},
|
| 199 |
+
],
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
id: "background",
|
| 203 |
+
title: "Background and Culture",
|
| 204 |
+
icon: <Globe size={18} />,
|
| 205 |
+
color: "bg-teal-500",
|
| 206 |
+
description:
|
| 207 |
+
"Evaluates cultural sensitivity, alignment, relevance, and freedom from bias.",
|
| 208 |
+
metrics: [
|
| 209 |
+
{
|
| 210 |
+
name: "Ethical Alignment",
|
| 211 |
+
description:
|
| 212 |
+
"How aligned with your culture, viewpoint, or values was the LLM?",
|
| 213 |
+
},
|
| 214 |
+
{
|
| 215 |
+
name: "Cultural Awareness",
|
| 216 |
+
description:
|
| 217 |
+
"How well did the LLM recognize when your cultural perspective was relevant?",
|
| 218 |
+
},
|
| 219 |
+
{
|
| 220 |
+
name: "Bias and Stereotypes",
|
| 221 |
+
description:
|
| 222 |
+
"How free from stereotypes or bias was the LLM's response?",
|
| 223 |
+
},
|
| 224 |
+
],
|
| 225 |
+
},
|
| 226 |
+
];
|
| 227 |
+
|
| 228 |
+
// Section header component
|
| 229 |
+
const SectionHeader = ({ title, icon, section }) => (
|
| 230 |
+
<div
|
| 231 |
+
className="px-4 py-3 bg-gray-50 border-b flex justify-between items-center cursor-pointer"
|
| 232 |
+
onClick={() => toggleSection(section)}
|
| 233 |
+
>
|
| 234 |
+
<div className="flex items-center gap-2">
|
| 235 |
+
{icon}
|
| 236 |
+
<h3 className="font-semibold text-gray-800">{title}</h3>
|
| 237 |
+
</div>
|
| 238 |
+
{openSections[section] ? (
|
| 239 |
+
<ChevronUp size={16} />
|
| 240 |
+
) : (
|
| 241 |
+
<ChevronDown size={16} />
|
| 242 |
+
)}
|
| 243 |
+
</div>
|
| 244 |
+
);
|
| 245 |
+
|
| 246 |
+
return (
|
| 247 |
+
<div className="space-y-6">
|
| 248 |
+
{/* Introduction */}
|
| 249 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 250 |
+
<SectionHeader
|
| 251 |
+
title="About HUMAINE"
|
| 252 |
+
icon={<Info size={18} />}
|
| 253 |
+
section="introduction"
|
| 254 |
+
/>
|
| 255 |
+
{openSections.introduction && (
|
| 256 |
+
<div className="p-4 bg-gradient-to-r from-white to-blue-50">
|
| 257 |
+
<div className="flex flex-col md:flex-row gap-6">
|
| 258 |
+
<div className="md:w-2/3">
|
| 259 |
+
<p className="mb-4">
|
| 260 |
+
<strong>HUMAINE</strong> (Human Understanding and Measurement
|
| 261 |
+
of AI Natural Engagement) is an evaluation benchmark that
|
| 262 |
+
measures language model performance through actual user
|
| 263 |
+
experience. While many benchmarks focus on technical
|
| 264 |
+
capabilities, this evaluation captures how users perceive and
|
| 265 |
+
rate different LLMs across common, everyday use cases.
|
| 266 |
+
</p>
|
| 267 |
+
<p className="mb-4">
|
| 268 |
+
This study collected ratings from 514 participants
|
| 269 |
+
demographically representative of the US population. Each
|
| 270 |
+
participant completed real-world tasks with different LLMs and
|
| 271 |
+
provided structured feedback on various aspects of their
|
| 272 |
+
experience.
|
| 273 |
+
</p>
|
| 274 |
+
<p>
|
| 275 |
+
The evaluation framework includes 7 high-level categories and
|
| 276 |
+
21 specific low-level metrics that measure aspects like
|
| 277 |
+
helpfulness, communication quality, understanding,
|
| 278 |
+
adaptiveness, trustworthiness, personality, and cultural
|
| 279 |
+
awareness, alongside demographic equity analysis.
|
| 280 |
+
</p>
|
| 281 |
+
</div>
|
| 282 |
+
<div className="md:w-1/3 bg-white p-4 rounded-lg border shadow-sm">
|
| 283 |
+
<h4 className="font-medium text-gray-700 mb-2 border-b pb-1">
|
| 284 |
+
Tasks Evaluated
|
| 285 |
+
</h4>
|
| 286 |
+
<ul className="list-disc pl-5 space-y-2 text-sm">
|
| 287 |
+
{tasksUsed.map((task, index) => (
|
| 288 |
+
<li key={index} className="text-gray-700">
|
| 289 |
+
{task}
|
| 290 |
+
</li>
|
| 291 |
+
))}
|
| 292 |
+
</ul>
|
| 293 |
+
</div>
|
| 294 |
+
</div>
|
| 295 |
+
</div>
|
| 296 |
+
)}
|
| 297 |
+
</div>
|
| 298 |
+
|
| 299 |
+
{/* Methodology */}
|
| 300 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 301 |
+
<SectionHeader
|
| 302 |
+
title="Methodology"
|
| 303 |
+
icon={<Book size={18} />}
|
| 304 |
+
section="methodology"
|
| 305 |
+
/>
|
| 306 |
+
{openSections.methodology && (
|
| 307 |
+
<div className="p-4">
|
| 308 |
+
<div className="grid md:grid-cols-1 gap-4">
|
| 309 |
+
{/* Study Design */}
|
| 310 |
+
<div className="border rounded-lg p-4 bg-gray-50 hover:shadow-md transition-shadow">
|
| 311 |
+
<h4 className="text-lg font-medium mb-2 flex items-center gap-2 text-gray-800">
|
| 312 |
+
<span className="w-8 h-8 rounded-full bg-blue-500 flex items-center justify-center text-white">
|
| 313 |
+
1
|
| 314 |
+
</span>
|
| 315 |
+
Study Design
|
| 316 |
+
</h4>
|
| 317 |
+
<ul className="list-disc pl-5 space-y-1 text-sm">
|
| 318 |
+
<li>
|
| 319 |
+
<strong>Participants:</strong> 514 individuals representing
|
| 320 |
+
US demographics (stratified by age, sex, ethnicity,
|
| 321 |
+
political affiliation).
|
| 322 |
+
</li>
|
| 323 |
+
<li>
|
| 324 |
+
<strong>Task Design:</strong> Six everyday tasks spanning
|
| 325 |
+
creative, practical, and analytical use cases.
|
| 326 |
+
</li>
|
| 327 |
+
<li>
|
| 328 |
+
<strong>Process:</strong> Each participant completed all six
|
| 329 |
+
tasks, each with a different LLM. The assignment of tasks to
|
| 330 |
+
models and the order of tasks were fully randomized.
|
| 331 |
+
</li>
|
| 332 |
+
<li>
|
| 333 |
+
<strong>Models Evaluated:</strong> Latest o1, GPT-4o, Claude
|
| 334 |
+
3.7 (extended thinking), Gemini 2 Flash, LLama 3.1 405B,
|
| 335 |
+
Deepseek R1.
|
| 336 |
+
</li>
|
| 337 |
+
<li>
|
| 338 |
+
<strong>Model Access:</strong> All models were accessed via
|
| 339 |
+
openrouter.ai with temperature=1, min_tokens=50,
|
| 340 |
+
max_tokens=5,000.
|
| 341 |
+
</li>
|
| 342 |
+
<li>
|
| 343 |
+
<strong>Conversations:</strong> Participants were required
|
| 344 |
+
to exchange at least 4 messages with the models and they
|
| 345 |
+
could exchange more if they wished (not capped).
|
| 346 |
+
</li>
|
| 347 |
+
</ul>
|
| 348 |
+
</div>
|
| 349 |
+
{/* Evaluation Framework */}
|
| 350 |
+
<div className="border rounded-lg p-4 bg-gray-50 hover:shadow-md transition-shadow">
|
| 351 |
+
<h4 className="text-lg font-medium mb-2 flex items-center gap-2 text-gray-800">
|
| 352 |
+
<span className="w-8 h-8 rounded-full bg-blue-500 flex items-center justify-center text-white">
|
| 353 |
+
2
|
| 354 |
+
</span>
|
| 355 |
+
Evaluation Framework
|
| 356 |
+
</h4>
|
| 357 |
+
<p className="mb-2 text-sm">
|
| 358 |
+
Our approach captures multiple aspects of user experience:
|
| 359 |
+
</p>
|
| 360 |
+
<ul className="list-disc pl-5 space-y-1 text-sm">
|
| 361 |
+
<li>
|
| 362 |
+
<strong>Multi-Dimensional Metrics:</strong> Performance is
|
| 363 |
+
evaluated across 7 high-level categories (rated 1-7) and 21
|
| 364 |
+
specific low-level metrics (rated 1-5).
|
| 365 |
+
</li>
|
| 366 |
+
<li>
|
| 367 |
+
<strong>Demographic Analysis:</strong> We assess performance
|
| 368 |
+
consistency across different demographic groups through
|
| 369 |
+
equity assessment.
|
| 370 |
+
</li>
|
| 371 |
+
<li>
|
| 372 |
+
<strong>Scale Normalization:</strong> All ratings are
|
| 373 |
+
converted to a 0-100 scale for easier comparison.
|
| 374 |
+
</li>
|
| 375 |
+
</ul>
|
| 376 |
+
</div>
|
| 377 |
+
|
| 378 |
+
{/* Data Analysis & Weighting */}
|
| 379 |
+
<div className="border rounded-lg p-4 bg-gray-50 hover:shadow-md transition-shadow">
|
| 380 |
+
<h4 className="text-lg font-medium mb-2 flex items-center gap-2 text-gray-800">
|
| 381 |
+
<span className="w-8 h-8 rounded-full bg-blue-500 flex items-center justify-center text-white">
|
| 382 |
+
3
|
| 383 |
+
</span>
|
| 384 |
+
Data Analysis & Weighting
|
| 385 |
+
</h4>
|
| 386 |
+
<ul className="list-disc pl-5 space-y-1 text-sm">
|
| 387 |
+
<li>
|
| 388 |
+
<strong>MRP Methodology:</strong> Data is processed through
|
| 389 |
+
multiple regression with poststratification to create
|
| 390 |
+
results weighted to be highly representative of the US
|
| 391 |
+
population.
|
| 392 |
+
</li>
|
| 393 |
+
<li>
|
| 394 |
+
<strong>Robust Estimation:</strong> All model estimations
|
| 395 |
+
were parametrically bootstrapped (N = 1000) to ensure that
|
| 396 |
+
any uncertainty in the estimates was accounted for.
|
| 397 |
+
</li>
|
| 398 |
+
<li>
|
| 399 |
+
<strong>National Level Comparisons:</strong> For the Overall
|
| 400 |
+
Rankings and Metrics Breakdown tabs, we use the
|
| 401 |
+
national-level estimates derived from MRP.
|
| 402 |
+
</li>
|
| 403 |
+
<li>
|
| 404 |
+
<strong>Task-Level Comparisons:</strong> For task-specific
|
| 405 |
+
comparisons (Task Performance tab), we use the raw
|
| 406 |
+
(unweighted) data due to sample size constraints.
|
| 407 |
+
</li>
|
| 408 |
+
</ul>
|
| 409 |
+
</div>
|
| 410 |
+
|
| 411 |
+
{/* Demographic Equity Assessment */}
|
| 412 |
+
<div className="border rounded-lg p-4 bg-gray-50 hover:shadow-md transition-shadow">
|
| 413 |
+
<h4 className="text-lg font-medium mb-2 flex items-center gap-2 text-gray-800">
|
| 414 |
+
<span className="w-8 h-8 rounded-full bg-blue-500 flex items-center justify-center text-white">
|
| 415 |
+
4
|
| 416 |
+
</span>
|
| 417 |
+
Demographic Equity Assessment
|
| 418 |
+
</h4>
|
| 419 |
+
<p className="mb-2 text-sm">
|
| 420 |
+
The equity assessment evaluates performance consistency across
|
| 421 |
+
demographic groups using a standardized approach:
|
| 422 |
+
</p>
|
| 423 |
+
<div className="bg-white rounded p-3 border mb-2">
|
| 424 |
+
<p className="text-xs mb-2">
|
| 425 |
+
The <strong>Equity Gap</strong> is the score difference
|
| 426 |
+
between the highest and lowest scoring demographic groups
|
| 427 |
+
for a specific metric. For example, if a model scores 85
|
| 428 |
+
with users age 18-29 but 65 with users age 60+ on
|
| 429 |
+
helpfulness, the equity gap would be 20 points.
|
| 430 |
+
</p>
|
| 431 |
+
<p className="text-xs mb-2">
|
| 432 |
+
We evaluate equity gaps using both{" "}
|
| 433 |
+
<strong>Effect Size</strong> and{" "}
|
| 434 |
+
<strong>Statistical Significance</strong> to identify
|
| 435 |
+
meaningful performance differences:
|
| 436 |
+
</p>
|
| 437 |
+
<div className="text-xs mt-2 space-y-2">
|
| 438 |
+
<div>
|
| 439 |
+
<p className="font-medium text-gray-700">
|
| 440 |
+
Effect Size Calculation:
|
| 441 |
+
</p>
|
| 442 |
+
<p className="text-gray-600 ml-2">
|
| 443 |
+
We normalize each gap by dividing it by the category's
|
| 444 |
+
standard deviation:
|
| 445 |
+
<br />
|
| 446 |
+
<span className="font-mono bg-gray-100 px-1">
|
| 447 |
+
Effect Size = (Max Score - Min Score) / Category
|
| 448 |
+
Standard Deviation
|
| 449 |
+
</span>
|
| 450 |
+
</p>
|
| 451 |
+
<p className="text-gray-600 ml-2 mt-1">
|
| 452 |
+
Category Standard Deviation is calculated from all
|
| 453 |
+
demographic MRP scores within that specific category.
|
| 454 |
+
</p>
|
| 455 |
+
</div>
|
| 456 |
+
|
| 457 |
+
<div>
|
| 458 |
+
<p className="font-medium text-gray-700">
|
| 459 |
+
Effect Size Classification:
|
| 460 |
+
</p>
|
| 461 |
+
<div className="grid grid-cols-2 gap-x-3 gap-y-2 mt-1">
|
| 462 |
+
<div className="flex items-center gap-1">
|
| 463 |
+
<div className="w-3 h-3 rounded-full bg-red-100"></div>
|
| 464 |
+
<div>
|
| 465 |
+
<span className="font-medium text-gray-700">
|
| 466 |
+
Large
|
| 467 |
+
</span>
|
| 468 |
+
<p className="text-gray-500">Effect Size ≥ 0.8</p>
|
| 469 |
+
</div>
|
| 470 |
+
</div>
|
| 471 |
+
<div className="flex items-center gap-1">
|
| 472 |
+
<div className="w-3 h-3 rounded-full bg-yellow-100"></div>
|
| 473 |
+
<div>
|
| 474 |
+
<span className="font-medium text-gray-700">
|
| 475 |
+
Medium
|
| 476 |
+
</span>
|
| 477 |
+
<p className="text-gray-500">Effect Size 0.5-0.8</p>
|
| 478 |
+
</div>
|
| 479 |
+
</div>
|
| 480 |
+
<div className="flex items-center gap-1">
|
| 481 |
+
<div className="w-3 h-3 rounded-full bg-blue-100"></div>
|
| 482 |
+
<div>
|
| 483 |
+
<span className="font-medium text-gray-700">
|
| 484 |
+
Small
|
| 485 |
+
</span>
|
| 486 |
+
<p className="text-gray-500">Effect Size 0.2-0.5</p>
|
| 487 |
+
</div>
|
| 488 |
+
</div>
|
| 489 |
+
<div className="flex items-center gap-1">
|
| 490 |
+
<div className="w-3 h-3 rounded-full bg-green-100"></div>
|
| 491 |
+
<div>
|
| 492 |
+
<span className="font-medium text-gray-700">
|
| 493 |
+
Negligible
|
| 494 |
+
</span>
|
| 495 |
+
<p className="text-gray-500">
|
| 496 |
+
Effect Size < 0.2
|
| 497 |
+
</p>
|
| 498 |
+
</div>
|
| 499 |
+
</div>
|
| 500 |
+
</div>
|
| 501 |
+
</div>
|
| 502 |
+
|
| 503 |
+
<div>
|
| 504 |
+
<p className="font-medium text-gray-700">
|
| 505 |
+
Statistical Significance:
|
| 506 |
+
</p>
|
| 507 |
+
<p className="text-gray-600 ml-2">
|
| 508 |
+
We use p-values to determine if gaps are statistically
|
| 509 |
+
significant (p < 0.05). To account for the large
|
| 510 |
+
number of tests performed, p-values were adjusted using
|
| 511 |
+
the Benjamini-Hochberg (FDR) method. Significance
|
| 512 |
+
reported reflects this correction (q < 0.05).
|
| 513 |
+
</p>
|
| 514 |
+
</div>
|
| 515 |
+
|
| 516 |
+
<div>
|
| 517 |
+
<p className="font-medium text-gray-700">
|
| 518 |
+
Equity Concerns:
|
| 519 |
+
</p>
|
| 520 |
+
<p className="text-gray-600 ml-2">
|
| 521 |
+
A gap is flagged as an equity concern when it has both:
|
| 522 |
+
<br />
|
| 523 |
+
1. Large Effect Size (≥ 0.8)
|
| 524 |
+
<br />
|
| 525 |
+
2. Statistical Significance (p < 0.05)
|
| 526 |
+
</p>
|
| 527 |
+
</div>
|
| 528 |
+
</div>
|
| 529 |
+
<p className="text-xs text-gray-600 mt-2">
|
| 530 |
+
<strong>Note:</strong> This methodology allows us to
|
| 531 |
+
identify meaningful performance differences across
|
| 532 |
+
demographic groups while accounting for both the magnitude
|
| 533 |
+
of the gap (effect size) and its statistical reliability
|
| 534 |
+
(significance).
|
| 535 |
+
</p>
|
| 536 |
+
</div>
|
| 537 |
+
</div>
|
| 538 |
+
</div>
|
| 539 |
+
</div>
|
| 540 |
+
)}
|
| 541 |
+
</div>
|
| 542 |
+
|
| 543 |
+
{/* Metrics Calculation */}
|
| 544 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 545 |
+
<SectionHeader
|
| 546 |
+
title="Metrics Calculation"
|
| 547 |
+
icon={<Calculator size={18} />}
|
| 548 |
+
section="metricsCalculation"
|
| 549 |
+
/>
|
| 550 |
+
{openSections.metricsCalculation && (
|
| 551 |
+
<div className="p-4">
|
| 552 |
+
<p className="text-sm mb-4">
|
| 553 |
+
This section explains how the metrics in the Overview page's
|
| 554 |
+
ranking table are calculated.
|
| 555 |
+
</p>
|
| 556 |
+
|
| 557 |
+
<div className="grid md:grid-cols-2 lg:grid-cols-3 gap-3">
|
| 558 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 559 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 560 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 561 |
+
Overall Score
|
| 562 |
+
</h4>
|
| 563 |
+
<p className="text-xs text-gray-600">
|
| 564 |
+
Average score across high-level categories at the national
|
| 565 |
+
level (0-100). This represents overall model performance
|
| 566 |
+
across all evaluation dimensions.
|
| 567 |
+
</p>
|
| 568 |
+
</div>
|
| 569 |
+
|
| 570 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 571 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 572 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 573 |
+
Overall SD
|
| 574 |
+
</h4>
|
| 575 |
+
<p className="text-xs text-gray-600">
|
| 576 |
+
Standard Deviation across high-level categories (lower = more
|
| 577 |
+
consistent). Measures how consistent a model performs across
|
| 578 |
+
different capability areas.
|
| 579 |
+
</p>
|
| 580 |
+
</div>
|
| 581 |
+
|
| 582 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 583 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 584 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 585 |
+
Max Equity Gap
|
| 586 |
+
</h4>
|
| 587 |
+
<p className="text-xs text-gray-600">
|
| 588 |
+
Largest demographic score difference (hover for details).
|
| 589 |
+
Shows the maximum difference in scores between any two
|
| 590 |
+
demographic groups, with indicators for effect size and
|
| 591 |
+
statistical significance.
|
| 592 |
+
</p>
|
| 593 |
+
</div>
|
| 594 |
+
|
| 595 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 596 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 597 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 598 |
+
Max Gap Area
|
| 599 |
+
</h4>
|
| 600 |
+
<p className="text-xs text-gray-600">
|
| 601 |
+
Factor and Category where the Max Equity Gap occurs.
|
| 602 |
+
Identifies which demographic factor (e.g., Age, Gender) and
|
| 603 |
+
which category (e.g., Helpfulness, Understanding) shows the
|
| 604 |
+
largest performance difference.
|
| 605 |
+
</p>
|
| 606 |
+
</div>
|
| 607 |
+
|
| 608 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 609 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 610 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 611 |
+
Equity Concerns
|
| 612 |
+
</h4>
|
| 613 |
+
<p className="text-xs text-gray-600">
|
| 614 |
+
Percentage of demographic gaps flagged as equity concerns
|
| 615 |
+
(lower is better). An equity concern is defined as a gap with
|
| 616 |
+
both large effect size (≥0.8) and statistical significance.
|
| 617 |
+
</p>
|
| 618 |
+
</div>
|
| 619 |
+
|
| 620 |
+
<div className="border rounded p-3 hover:shadow-md transition-shadow">
|
| 621 |
+
<h4 className="text-sm font-medium text-gray-800 mb-1 flex items-center gap-1">
|
| 622 |
+
<div className="w-4 h-4 rounded-full bg-blue-500"></div>
|
| 623 |
+
User Retention
|
| 624 |
+
</h4>
|
| 625 |
+
<p className="text-xs text-gray-600">
|
| 626 |
+
Percentage of participants who said they would use the model
|
| 627 |
+
again. This is based on the "Repeat Usage" question and
|
| 628 |
+
indicates user satisfaction and likelihood to continue using
|
| 629 |
+
the model.
|
| 630 |
+
</p>
|
| 631 |
+
</div>
|
| 632 |
+
</div>
|
| 633 |
+
|
| 634 |
+
<div className="mt-4 bg-blue-50 border-l-4 border-blue-400 p-3 rounded">
|
| 635 |
+
<p className="text-xs text-blue-800">
|
| 636 |
+
<strong>Note:</strong> All scores shown in the dashboard are
|
| 637 |
+
based on MRP-adjusted (Multilevel Regression with
|
| 638 |
+
Poststratification) estimates to ensure they are representative
|
| 639 |
+
of the US population. The only exception is the Task Performance
|
| 640 |
+
tab, which uses raw scores due to sample size constraints at the
|
| 641 |
+
task level.
|
| 642 |
+
</p>
|
| 643 |
+
</div>
|
| 644 |
+
</div>
|
| 645 |
+
)}
|
| 646 |
+
</div>
|
| 647 |
+
|
| 648 |
+
{/* Metrics Explained */}
|
| 649 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 650 |
+
<SectionHeader
|
| 651 |
+
title="Metrics Explained"
|
| 652 |
+
icon={<BarChart size={18} />}
|
| 653 |
+
section="metricsExplained"
|
| 654 |
+
/>
|
| 655 |
+
{openSections.metricsExplained && (
|
| 656 |
+
<div className="p-4">
|
| 657 |
+
<p className="mb-4 text-sm">
|
| 658 |
+
Our evaluation uses 7 high-level categories (rated on a 1-7 Likert
|
| 659 |
+
scale) and 21 low-level metrics (rated on a 1-5 scale) to
|
| 660 |
+
comprehensively assess LLM performance from a user experience
|
| 661 |
+
perspective.
|
| 662 |
+
</p>
|
| 663 |
+
|
| 664 |
+
{/* Metric selector tabs */}
|
| 665 |
+
<div className="flex flex-wrap gap-1 mb-4 border-b">
|
| 666 |
+
{metricsData.map((metric) => (
|
| 667 |
+
<button
|
| 668 |
+
key={metric.id}
|
| 669 |
+
className={`px-3 py-2 text-sm rounded-t-lg flex items-center gap-1 ${
|
| 670 |
+
activeMetricTab === metric.id
|
| 671 |
+
? "bg-gray-100 font-medium border-t border-l border-r"
|
| 672 |
+
: "bg-white hover:bg-gray-50"
|
| 673 |
+
}`}
|
| 674 |
+
onClick={() => setActiveMetricTab(metric.id)}
|
| 675 |
+
>
|
| 676 |
+
<span
|
| 677 |
+
className={`w-2 h-2 rounded-full ${metric.color}`}
|
| 678 |
+
></span>
|
| 679 |
+
{metric.title}
|
| 680 |
+
</button>
|
| 681 |
+
))}
|
| 682 |
+
</div>
|
| 683 |
+
|
| 684 |
+
{/* Active metric content */}
|
| 685 |
+
{metricsData.map(
|
| 686 |
+
(metric) =>
|
| 687 |
+
activeMetricTab === metric.id && (
|
| 688 |
+
<div
|
| 689 |
+
key={metric.id}
|
| 690 |
+
className="border rounded-lg overflow-hidden"
|
| 691 |
+
>
|
| 692 |
+
<div className="px-4 py-3 bg-gray-50 border-b flex items-center gap-2">
|
| 693 |
+
<div className={`rounded-full`}>
|
| 694 |
+
{React.cloneElement(metric.icon, {
|
| 695 |
+
className: `text-gray-700 w-5 h-5`,
|
| 696 |
+
})}
|
| 697 |
+
</div>
|
| 698 |
+
<h4 className="font-medium text-gray-800">
|
| 699 |
+
{metric.title}{" "}
|
| 700 |
+
<span className="text-sm font-normal text-gray-600">
|
| 701 |
+
(1-7 scale)
|
| 702 |
+
</span>
|
| 703 |
+
</h4>
|
| 704 |
+
</div>
|
| 705 |
+
<div className="p-4">
|
| 706 |
+
<p className="text-sm mb-4">{metric.description}</p>
|
| 707 |
+
|
| 708 |
+
{metric.metrics.length > 0 && (
|
| 709 |
+
<>
|
| 710 |
+
<h5 className="text-sm font-medium mb-3 text-gray-700">
|
| 711 |
+
Specific Metrics (1-5 scale)
|
| 712 |
+
</h5>
|
| 713 |
+
<div className="grid md:grid-cols-3 gap-3">
|
| 714 |
+
{metric.metrics.map((subMetric, idx) => (
|
| 715 |
+
<div
|
| 716 |
+
key={idx}
|
| 717 |
+
className="border rounded p-3 hover:shadow-sm transition-shadow"
|
| 718 |
+
>
|
| 719 |
+
<p className="text-sm font-medium">
|
| 720 |
+
{subMetric.name}
|
| 721 |
+
</p>
|
| 722 |
+
<p className="text-xs text-gray-600 mt-1">
|
| 723 |
+
{subMetric.description}
|
| 724 |
+
</p>
|
| 725 |
+
</div>
|
| 726 |
+
))}
|
| 727 |
+
</div>
|
| 728 |
+
</>
|
| 729 |
+
)}
|
| 730 |
+
</div>
|
| 731 |
+
</div>
|
| 732 |
+
)
|
| 733 |
+
)}
|
| 734 |
+
</div>
|
| 735 |
+
)}
|
| 736 |
+
</div>
|
| 737 |
+
</div>
|
| 738 |
+
);
|
| 739 |
+
};
|
| 740 |
+
|
| 741 |
+
export default AboutTab;
|
leaderboard-app/components/DemographicAnalysis.jsx
ADDED
|
@@ -0,0 +1,925 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// components/DemographicAnalysis.jsx - Complete Updated File
|
| 2 |
+
|
| 3 |
+
"use client";
|
| 4 |
+
|
| 5 |
+
import React, { useState, useMemo, useEffect, useRef } from "react";
|
| 6 |
+
import {
|
| 7 |
+
BarChart,
|
| 8 |
+
Bar,
|
| 9 |
+
XAxis,
|
| 10 |
+
YAxis,
|
| 11 |
+
CartesianGrid,
|
| 12 |
+
Tooltip as RechartsTooltip,
|
| 13 |
+
Legend,
|
| 14 |
+
ResponsiveContainer,
|
| 15 |
+
Cell,
|
| 16 |
+
LabelList,
|
| 17 |
+
} from "recharts";
|
| 18 |
+
import {
|
| 19 |
+
getSignificanceIndicator,
|
| 20 |
+
formatDisplayKey,
|
| 21 |
+
getMetricTooltip,
|
| 22 |
+
} from "../lib/utils"; // Adjust path as needed
|
| 23 |
+
import { Tooltip } from "./Tooltip"; // Your custom Tooltip component
|
| 24 |
+
|
| 25 |
+
// Helper component for info tooltips with fixed positioning
|
| 26 |
+
const InfoTooltip = ({ text }) => {
|
| 27 |
+
const [isVisible, setIsVisible] = useState(false);
|
| 28 |
+
const [position, setPosition] = useState({ top: 0, left: 0 });
|
| 29 |
+
const buttonRef = useRef(null);
|
| 30 |
+
|
| 31 |
+
// Update position when tooltip becomes visible
|
| 32 |
+
useEffect(() => {
|
| 33 |
+
if (isVisible && buttonRef.current) {
|
| 34 |
+
const rect = buttonRef.current.getBoundingClientRect();
|
| 35 |
+
setPosition({
|
| 36 |
+
top: rect.top - 10, // Position above the icon with a small gap
|
| 37 |
+
left: rect.left + 12, // Center with the icon
|
| 38 |
+
});
|
| 39 |
+
}
|
| 40 |
+
}, [isVisible]);
|
| 41 |
+
|
| 42 |
+
return (
|
| 43 |
+
<div className="relative inline-block ml-1 align-middle">
|
| 44 |
+
<button
|
| 45 |
+
ref={buttonRef}
|
| 46 |
+
className="text-gray-400 hover:text-gray-600 focus:outline-none"
|
| 47 |
+
onMouseEnter={() => setIsVisible(true)}
|
| 48 |
+
onMouseLeave={() => setIsVisible(false)}
|
| 49 |
+
onClick={(e) => {
|
| 50 |
+
e.stopPropagation();
|
| 51 |
+
setIsVisible(!isVisible);
|
| 52 |
+
}}
|
| 53 |
+
aria-label="Info"
|
| 54 |
+
>
|
| 55 |
+
<svg
|
| 56 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 57 |
+
className="h-4 w-4"
|
| 58 |
+
viewBox="0 0 20 20"
|
| 59 |
+
fill="currentColor"
|
| 60 |
+
>
|
| 61 |
+
<path
|
| 62 |
+
fillRule="evenodd"
|
| 63 |
+
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
|
| 64 |
+
clipRule="evenodd"
|
| 65 |
+
/>
|
| 66 |
+
</svg>
|
| 67 |
+
</button>
|
| 68 |
+
{isVisible && (
|
| 69 |
+
<div
|
| 70 |
+
className="fixed p-2 bg-white border-1 rounded shadow-xl text-xs text-gray-700 whitespace-pre-wrap"
|
| 71 |
+
style={{
|
| 72 |
+
top: `${position.top}px`,
|
| 73 |
+
left: `${position.left}px`,
|
| 74 |
+
zIndex: 9999,
|
| 75 |
+
maxWidth: "250px",
|
| 76 |
+
transform: "translate(-50%, -100%)",
|
| 77 |
+
}}
|
| 78 |
+
>
|
| 79 |
+
{text}
|
| 80 |
+
</div>
|
| 81 |
+
)}
|
| 82 |
+
</div>
|
| 83 |
+
);
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
// Custom tooltip for DEMOGRAPHIC chart (shows scores per model for a level)
|
| 87 |
+
const CustomDemographicTooltip = ({ active, payload, label }) => {
|
| 88 |
+
if (active && payload && payload.length) {
|
| 89 |
+
const sortedPayload = [...payload].sort(
|
| 90 |
+
(a, b) => (b.value || 0) - (a.value || 0)
|
| 91 |
+
);
|
| 92 |
+
return (
|
| 93 |
+
<div className="bg-white p-3 border rounded shadow-lg max-w-xs">
|
| 94 |
+
<p className="font-medium text-sm mb-1">{label}</p>
|
| 95 |
+
{sortedPayload.map((entry, index) => (
|
| 96 |
+
<div key={`item-${index}`} className="flex items-center mt-1">
|
| 97 |
+
<div
|
| 98 |
+
className="w-3 h-3 mr-2 rounded-full flex-shrink-0"
|
| 99 |
+
style={{
|
| 100 |
+
backgroundColor:
|
| 101 |
+
entry.payload[`${entry.dataKey}_color`] ||
|
| 102 |
+
entry.color ||
|
| 103 |
+
"#999",
|
| 104 |
+
}}
|
| 105 |
+
></div>
|
| 106 |
+
<span className="text-xs flex-grow pr-2">{entry.name}: </span>
|
| 107 |
+
<span className="text-xs font-medium ml-1 whitespace-nowrap">
|
| 108 |
+
{typeof entry.value === "number" ? entry.value.toFixed(1) : "N/A"}
|
| 109 |
+
</span>
|
| 110 |
+
</div>
|
| 111 |
+
))}
|
| 112 |
+
</div>
|
| 113 |
+
);
|
| 114 |
+
}
|
| 115 |
+
return null;
|
| 116 |
+
};
|
| 117 |
+
|
| 118 |
+
// Custom tooltip for EQUITY GAP chart - UPDATED
|
| 119 |
+
const EquityGapTooltip = ({ active, payload }) => {
|
| 120 |
+
if (active && payload && payload.length > 0) {
|
| 121 |
+
const data = payload[0].payload; // data here IS an item from equityGapChartData (derived from all_equity_gaps)
|
| 122 |
+
|
| 123 |
+
if (!data || typeof data !== "object") return null;
|
| 124 |
+
|
| 125 |
+
// Get significance indicator parts
|
| 126 |
+
const significanceInfo = getSignificanceIndicator(
|
| 127 |
+
data.is_statistically_significant,
|
| 128 |
+
data.p_value
|
| 129 |
+
);
|
| 130 |
+
const ciLower = data.gap_confidence_interval_95_lower;
|
| 131 |
+
const ciUpper = data.gap_confidence_interval_95_upper;
|
| 132 |
+
|
| 133 |
+
return (
|
| 134 |
+
<div className="bg-white p-3 border rounded shadow-lg text-xs max-w-xs">
|
| 135 |
+
<p className="font-medium text-sm mb-2">{data.model}</p>
|
| 136 |
+
<div className="space-y-1">
|
| 137 |
+
<div className="flex justify-between">
|
| 138 |
+
<span className="font-semibold">Equity Gap:</span>
|
| 139 |
+
{/* 'gap' key is used in chart data */}
|
| 140 |
+
<span>{data.gap?.toFixed(1) ?? "N/A"} pts</span>
|
| 141 |
+
</div>
|
| 142 |
+
{data.effect_size !== undefined && data.effect_size !== null && (
|
| 143 |
+
<div className="flex justify-between">
|
| 144 |
+
<span className="font-semibold">Effect Size:</span>
|
| 145 |
+
<span>
|
| 146 |
+
{data.effect_size?.toFixed(2) ?? "N/A"} (
|
| 147 |
+
{data.effect_size_class || "N/A"})
|
| 148 |
+
</span>
|
| 149 |
+
</div>
|
| 150 |
+
)}
|
| 151 |
+
{/* Show Significance */}
|
| 152 |
+
<div className="flex justify-between items-center">
|
| 153 |
+
<span className="font-semibold">Significance:</span>
|
| 154 |
+
<span className={`flex items-center ${significanceInfo.className}`}>
|
| 155 |
+
{significanceInfo.tooltip.replace(/Statistically /g, "")}{" "}
|
| 156 |
+
{/* Shorten text */}
|
| 157 |
+
<span className="ml-1 font-bold">{significanceInfo.symbol}</span>
|
| 158 |
+
</span>
|
| 159 |
+
</div>
|
| 160 |
+
{/* Show Confidence Interval */}
|
| 161 |
+
<div className="flex justify-between">
|
| 162 |
+
<span className="font-semibold">95% CI:</span>
|
| 163 |
+
<span>
|
| 164 |
+
{typeof ciLower === "number" && typeof ciUpper === "number"
|
| 165 |
+
? `[${ciLower.toFixed(1)}, ${ciUpper.toFixed(1)}]`
|
| 166 |
+
: "N/A"}
|
| 167 |
+
</span>
|
| 168 |
+
</div>
|
| 169 |
+
{/* Show Concern Flag */}
|
| 170 |
+
{data.is_equity_concern !== undefined && (
|
| 171 |
+
<div className="flex justify-between">
|
| 172 |
+
<span className="font-semibold">Concern Flag:</span>
|
| 173 |
+
<span
|
| 174 |
+
className={
|
| 175 |
+
data.is_equity_concern
|
| 176 |
+
? "font-bold text-red-600"
|
| 177 |
+
: "text-gray-600"
|
| 178 |
+
}
|
| 179 |
+
>
|
| 180 |
+
{data.is_equity_concern ? "Yes" : "No"}
|
| 181 |
+
</span>
|
| 182 |
+
</div>
|
| 183 |
+
)}
|
| 184 |
+
{/* Show Min/Max Groups */}
|
| 185 |
+
<div className="flex justify-between">
|
| 186 |
+
<span className="font-semibold">Lowest Group:</span>
|
| 187 |
+
<span>
|
| 188 |
+
{data.min_level || "N/A"} ({data.min_score?.toFixed(1) ?? "-"})
|
| 189 |
+
</span>
|
| 190 |
+
</div>
|
| 191 |
+
<div className="flex justify-between">
|
| 192 |
+
<span className="font-semibold">Highest Group:</span>
|
| 193 |
+
<span>
|
| 194 |
+
{data.max_level || "N/A"} ({data.max_score?.toFixed(1) ?? "-"})
|
| 195 |
+
</span>
|
| 196 |
+
</div>
|
| 197 |
+
</div>
|
| 198 |
+
</div>
|
| 199 |
+
);
|
| 200 |
+
}
|
| 201 |
+
return null;
|
| 202 |
+
};
|
| 203 |
+
|
| 204 |
+
// New helper functions for styling consistency
|
| 205 |
+
|
| 206 |
+
// New helper function to get badge color for effect size
|
| 207 |
+
const getEffectSizeBadgeStyle = (effectSizeClass) => {
|
| 208 |
+
switch (effectSizeClass) {
|
| 209 |
+
case "Large":
|
| 210 |
+
return "bg-red-100 text-red-800";
|
| 211 |
+
case "Medium":
|
| 212 |
+
return "bg-yellow-100 text-yellow-800";
|
| 213 |
+
case "Small":
|
| 214 |
+
return "bg-blue-100 text-blue-800";
|
| 215 |
+
case "Negligible":
|
| 216 |
+
return "bg-green-100 text-green-800";
|
| 217 |
+
default:
|
| 218 |
+
return "bg-gray-100 text-gray-800";
|
| 219 |
+
}
|
| 220 |
+
};
|
| 221 |
+
|
| 222 |
+
// New helper function to get badge color for significance
|
| 223 |
+
const getSignificanceBadgeStyle = (isSignificant) => {
|
| 224 |
+
if (isSignificant === null || isSignificant === undefined)
|
| 225 |
+
return "bg-gray-100 text-gray-800";
|
| 226 |
+
return isSignificant
|
| 227 |
+
? "bg-blue-100 text-blue-800"
|
| 228 |
+
: "bg-gray-100 text-gray-600";
|
| 229 |
+
};
|
| 230 |
+
|
| 231 |
+
// New helper function to get badge color for concern
|
| 232 |
+
const getConcernBadgeStyle = (isConcern) => {
|
| 233 |
+
if (isConcern === null || isConcern === undefined)
|
| 234 |
+
return "bg-gray-100 text-gray-800";
|
| 235 |
+
return isConcern ? "bg-red-100 text-red-800" : "bg-green-100 text-green-800";
|
| 236 |
+
};
|
| 237 |
+
|
| 238 |
+
// New helper function to format p-value
|
| 239 |
+
const formatPValue = (pValue) => {
|
| 240 |
+
if (pValue === null || pValue === undefined) return "N/A";
|
| 241 |
+
return `p=${pValue.toFixed(3)}` + (pValue < 0.05 ? " < 0.05" : " ≥ 0.05");
|
| 242 |
+
};
|
| 243 |
+
|
| 244 |
+
// New helper function to create effect size tooltip content
|
| 245 |
+
const getEffectSizeTooltip = (effectSize) => {
|
| 246 |
+
return `Effect Size: ${effectSize.toFixed(2)}
|
| 247 |
+
|
| 248 |
+
Calculation: Normalized Effect Size = (Max Score - Min Score) / Category Standard Deviation
|
| 249 |
+
|
| 250 |
+
Category Standard Deviation: The standard deviation of all demographic scores within this specific category.
|
| 251 |
+
|
| 252 |
+
Thresholds:
|
| 253 |
+
• ≥ 0.8: "Large"
|
| 254 |
+
• ≥ 0.5 and < 0.8: "Medium"
|
| 255 |
+
• ≥ 0.2 and < 0.5: "Small"
|
| 256 |
+
• < 0.2: "Negligible"`;
|
| 257 |
+
};
|
| 258 |
+
|
| 259 |
+
// Main component
|
| 260 |
+
const DemographicAnalysis = ({
|
| 261 |
+
rawData = { demographicOptions: {}, mrpDemographics: {} }, // Expect camelCase keys here, snake_case inside mrpDemographics
|
| 262 |
+
modelsMeta = [], // Expect camelCase keys
|
| 263 |
+
metricsData = { highLevelCategories: {}, lowLevelMetrics: {} }, // Expect Title Case keys, contains internalMetricKey
|
| 264 |
+
equityAnalysis = { all_equity_gaps: [], universal_issues: [] }, // Expect snake_case keys
|
| 265 |
+
}) => {
|
| 266 |
+
// Use Title Case metric keys for state and dropdowns
|
| 267 |
+
const highLevelMetricDisplayKeys = Object.keys(
|
| 268 |
+
metricsData?.highLevelCategories || {}
|
| 269 |
+
).sort();
|
| 270 |
+
const lowLevelMetricDisplayKeys = Object.keys(
|
| 271 |
+
metricsData?.lowLevelMetrics || {}
|
| 272 |
+
).sort();
|
| 273 |
+
|
| 274 |
+
const [selectedDemographicFactor, setSelectedDemographicFactor] =
|
| 275 |
+
useState(null);
|
| 276 |
+
const [selectedMetricDisplayKey, setSelectedMetricDisplayKey] =
|
| 277 |
+
useState(null); // State holds Title Case
|
| 278 |
+
const [metricLevel, setMetricLevel] = useState("high");
|
| 279 |
+
|
| 280 |
+
const currentMetricDisplayKeys = useMemo(
|
| 281 |
+
() =>
|
| 282 |
+
metricLevel === "high"
|
| 283 |
+
? highLevelMetricDisplayKeys
|
| 284 |
+
: lowLevelMetricDisplayKeys,
|
| 285 |
+
[metricLevel, highLevelMetricDisplayKeys, lowLevelMetricDisplayKeys]
|
| 286 |
+
);
|
| 287 |
+
|
| 288 |
+
const getModelColor = (modelName) =>
|
| 289 |
+
modelsMeta.find((m) => m.model === modelName)?.color || "#999999";
|
| 290 |
+
|
| 291 |
+
// Set default factor
|
| 292 |
+
useEffect(() => {
|
| 293 |
+
const factors = Object.keys(rawData.demographicOptions || {});
|
| 294 |
+
if (!selectedDemographicFactor && factors.length > 0) {
|
| 295 |
+
const defaultFactor = factors.includes("Age") ? "Age" : factors.sort()[0];
|
| 296 |
+
setSelectedDemographicFactor(defaultFactor);
|
| 297 |
+
}
|
| 298 |
+
}, [rawData.demographicOptions, selectedDemographicFactor]);
|
| 299 |
+
|
| 300 |
+
// Set default metric when list available
|
| 301 |
+
useEffect(() => {
|
| 302 |
+
if (!selectedMetricDisplayKey && currentMetricDisplayKeys.length > 0) {
|
| 303 |
+
// Default logic might need adjustment if "Overall" isn't a key
|
| 304 |
+
const defaultMetric = currentMetricDisplayKeys.includes("Overall Score")
|
| 305 |
+
? "Overall Score"
|
| 306 |
+
: currentMetricDisplayKeys[0];
|
| 307 |
+
setSelectedMetricDisplayKey(defaultMetric);
|
| 308 |
+
} else if (
|
| 309 |
+
selectedMetricDisplayKey &&
|
| 310 |
+
!currentMetricDisplayKeys.includes(selectedMetricDisplayKey)
|
| 311 |
+
) {
|
| 312 |
+
setSelectedMetricDisplayKey(
|
| 313 |
+
currentMetricDisplayKeys.length > 0 ? currentMetricDisplayKeys[0] : null
|
| 314 |
+
);
|
| 315 |
+
}
|
| 316 |
+
}, [currentMetricDisplayKeys, selectedMetricDisplayKey, metricLevel]);
|
| 317 |
+
|
| 318 |
+
// Get the internal snake_case key for filtering equity gaps
|
| 319 |
+
const internalMetricKey = useMemo(() => {
|
| 320 |
+
if (!selectedMetricDisplayKey) return null;
|
| 321 |
+
const allMetrics = {
|
| 322 |
+
...(metricsData?.highLevelCategories || {}),
|
| 323 |
+
...(metricsData?.lowLevelMetrics || {}),
|
| 324 |
+
};
|
| 325 |
+
// Look up using Title Case display key
|
| 326 |
+
return allMetrics[selectedMetricDisplayKey]?.internalMetricKey ?? null;
|
| 327 |
+
}, [selectedMetricDisplayKey, metricsData]);
|
| 328 |
+
|
| 329 |
+
// Filter equity gaps based on internal key and factor
|
| 330 |
+
const filteredEquityGaps = useMemo(() => {
|
| 331 |
+
// Use internalMetricKey (snake_case) and selectedDemographicFactor
|
| 332 |
+
if (
|
| 333 |
+
!internalMetricKey ||
|
| 334 |
+
!selectedDemographicFactor ||
|
| 335 |
+
!equityAnalysis?.all_equity_gaps ||
|
| 336 |
+
!Array.isArray(equityAnalysis.all_equity_gaps)
|
| 337 |
+
) {
|
| 338 |
+
return [];
|
| 339 |
+
}
|
| 340 |
+
// Filter all_equity_gaps (which has snake_case keys)
|
| 341 |
+
return equityAnalysis.all_equity_gaps.filter(
|
| 342 |
+
(gap) =>
|
| 343 |
+
gap.category === internalMetricKey &&
|
| 344 |
+
gap.demographic_factor === selectedDemographicFactor
|
| 345 |
+
);
|
| 346 |
+
}, [
|
| 347 |
+
internalMetricKey,
|
| 348 |
+
selectedDemographicFactor,
|
| 349 |
+
equityAnalysis?.all_equity_gaps,
|
| 350 |
+
]);
|
| 351 |
+
|
| 352 |
+
// Prepare data for Equity Gap Chart - uses snake_case keys from filteredEquityGaps
|
| 353 |
+
const equityGapChartData = useMemo(() => {
|
| 354 |
+
return filteredEquityGaps
|
| 355 |
+
.map((gap) => ({
|
| 356 |
+
// Pass all original snake_case keys needed by tooltip/table
|
| 357 |
+
// These keys match the fields expected by EquityGapTooltip
|
| 358 |
+
model: gap.model,
|
| 359 |
+
gap: gap.score_range ?? 0, // Rename score_range to gap for chart dataKey
|
| 360 |
+
score_range: gap.score_range,
|
| 361 |
+
effect_size: gap.effect_size,
|
| 362 |
+
effect_size_class: gap.effect_size_class,
|
| 363 |
+
is_statistically_significant: gap.is_statistically_significant,
|
| 364 |
+
p_value: gap.p_value,
|
| 365 |
+
gap_confidence_interval_95_lower: gap.gap_confidence_interval_95_lower,
|
| 366 |
+
gap_confidence_interval_95_upper: gap.gap_confidence_interval_95_upper,
|
| 367 |
+
is_equity_concern: gap.is_equity_concern,
|
| 368 |
+
min_level: gap.min_level,
|
| 369 |
+
min_score: gap.min_score,
|
| 370 |
+
max_level: gap.max_level,
|
| 371 |
+
max_score: gap.max_score,
|
| 372 |
+
|
| 373 |
+
// Add derived properties
|
| 374 |
+
color: getModelColor(gap.model),
|
| 375 |
+
}))
|
| 376 |
+
.sort((a, b) => (a.gap ?? 0) - (b.gap ?? 0)) // Sort by gap size ascending
|
| 377 |
+
.map((item, index) => ({ ...item, rank: index + 1 })); // Add rank based on gap size
|
| 378 |
+
}, [filteredEquityGaps]); // Depend only on filteredEquityGaps
|
| 379 |
+
|
| 380 |
+
// Prepare data for Demographic Breakdown Chart
|
| 381 |
+
const demographicChartData = useMemo(() => {
|
| 382 |
+
// selectedMetricDisplayKey is Title Case, matching keys in mrpDemographics
|
| 383 |
+
if (
|
| 384 |
+
!selectedDemographicFactor ||
|
| 385 |
+
!selectedMetricDisplayKey ||
|
| 386 |
+
!rawData.mrpDemographics
|
| 387 |
+
)
|
| 388 |
+
return [];
|
| 389 |
+
const metricKeyInData = selectedMetricDisplayKey; // Use Title Case key
|
| 390 |
+
const levels = rawData.demographicOptions[selectedDemographicFactor] || [];
|
| 391 |
+
if (levels.length === 0) return [];
|
| 392 |
+
|
| 393 |
+
const chartData = levels.map((level) => {
|
| 394 |
+
const entry = { level };
|
| 395 |
+
modelsMeta.forEach((model) => {
|
| 396 |
+
// Access mrpDemographics using Title Case metric key
|
| 397 |
+
const score =
|
| 398 |
+
rawData.mrpDemographics[model.model]?.[selectedDemographicFactor]?.[
|
| 399 |
+
level
|
| 400 |
+
]?.[metricKeyInData];
|
| 401 |
+
entry[model.model] =
|
| 402 |
+
score !== undefined && score !== null && score !== "N/A"
|
| 403 |
+
? parseFloat(score)
|
| 404 |
+
: null;
|
| 405 |
+
entry[`${model.model}_color`] = model.color;
|
| 406 |
+
});
|
| 407 |
+
return entry;
|
| 408 |
+
});
|
| 409 |
+
return chartData.sort((a, b) => {
|
| 410 |
+
if (a.level === "N/A") return 1;
|
| 411 |
+
if (b.level === "N/A") return -1;
|
| 412 |
+
return a.level.localeCompare(b.level);
|
| 413 |
+
});
|
| 414 |
+
}, [
|
| 415 |
+
selectedDemographicFactor,
|
| 416 |
+
selectedMetricDisplayKey,
|
| 417 |
+
rawData.mrpDemographics,
|
| 418 |
+
rawData.demographicOptions,
|
| 419 |
+
modelsMeta,
|
| 420 |
+
]);
|
| 421 |
+
|
| 422 |
+
const modelsWithDemoData = useMemo(
|
| 423 |
+
() =>
|
| 424 |
+
modelsMeta
|
| 425 |
+
.map((m) => m.model)
|
| 426 |
+
.filter((modelName) =>
|
| 427 |
+
demographicChartData.some(
|
| 428 |
+
(d) => d[modelName] !== null && d[modelName] !== undefined
|
| 429 |
+
)
|
| 430 |
+
),
|
| 431 |
+
[modelsMeta, demographicChartData]
|
| 432 |
+
);
|
| 433 |
+
|
| 434 |
+
return (
|
| 435 |
+
<div>
|
| 436 |
+
{/* Controls Panel */}
|
| 437 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 438 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 439 |
+
<h3 className="font-semibold text-gray-800">
|
| 440 |
+
Demographic Analysis Controls
|
| 441 |
+
</h3>
|
| 442 |
+
</div>
|
| 443 |
+
<div className="p-4 grid grid-cols-1 md:grid-cols-3 gap-4">
|
| 444 |
+
{/* Factor Selector */}
|
| 445 |
+
<div>
|
| 446 |
+
<label
|
| 447 |
+
htmlFor="factorSelect"
|
| 448 |
+
className="block text-sm font-medium text-gray-700 mb-1"
|
| 449 |
+
>
|
| 450 |
+
Demographic Factor
|
| 451 |
+
</label>
|
| 452 |
+
<select
|
| 453 |
+
id="factorSelect"
|
| 454 |
+
className="w-full border rounded-md px-3 py-2 bg-white shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
|
| 455 |
+
value={selectedDemographicFactor || ""}
|
| 456 |
+
onChange={(e) => setSelectedDemographicFactor(e.target.value)}
|
| 457 |
+
>
|
| 458 |
+
<option value="" disabled>
|
| 459 |
+
Select factor
|
| 460 |
+
</option>
|
| 461 |
+
{Object.keys(rawData.demographicOptions || {})
|
| 462 |
+
.sort()
|
| 463 |
+
.map((factor) => (
|
| 464 |
+
<option key={factor} value={factor}>
|
| 465 |
+
{formatDisplayKey(factor)}
|
| 466 |
+
</option>
|
| 467 |
+
))}
|
| 468 |
+
</select>
|
| 469 |
+
</div>
|
| 470 |
+
{/* Level Toggle */}
|
| 471 |
+
<div>
|
| 472 |
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
| 473 |
+
Metric Level
|
| 474 |
+
</label>
|
| 475 |
+
<div className="flex">
|
| 476 |
+
<button
|
| 477 |
+
className={`px-3 py-2 text-sm font-medium border ${
|
| 478 |
+
metricLevel === "high"
|
| 479 |
+
? "bg-blue-100 text-blue-800 border-blue-300"
|
| 480 |
+
: "bg-white text-gray-700 border-gray-300 hover:bg-gray-50"
|
| 481 |
+
} rounded-l-md flex-1`}
|
| 482 |
+
onClick={() => setMetricLevel("high")}
|
| 483 |
+
>
|
| 484 |
+
High-Level
|
| 485 |
+
</button>
|
| 486 |
+
<button
|
| 487 |
+
className={`px-3 py-2 text-sm font-medium border-t border-b border-r ${
|
| 488 |
+
metricLevel === "low"
|
| 489 |
+
? "bg-blue-100 text-blue-800 border-blue-300"
|
| 490 |
+
: "bg-white text-gray-700 border-gray-300 hover:bg-gray-50"
|
| 491 |
+
} rounded-r-md flex-1`}
|
| 492 |
+
onClick={() => setMetricLevel("low")}
|
| 493 |
+
>
|
| 494 |
+
Low-Level
|
| 495 |
+
</button>
|
| 496 |
+
</div>
|
| 497 |
+
</div>
|
| 498 |
+
{/* Metric Selector - Uses Title Case keys */}
|
| 499 |
+
<div>
|
| 500 |
+
<label
|
| 501 |
+
htmlFor="metricSelect"
|
| 502 |
+
className="block text-sm font-medium text-gray-700 mb-1"
|
| 503 |
+
>
|
| 504 |
+
<Tooltip content={getMetricTooltip(selectedMetricDisplayKey)}>
|
| 505 |
+
<span>
|
| 506 |
+
{metricLevel === "high"
|
| 507 |
+
? "High-Level Category"
|
| 508 |
+
: "Low-Level Metric"}
|
| 509 |
+
</span>
|
| 510 |
+
</Tooltip>
|
| 511 |
+
</label>
|
| 512 |
+
<select
|
| 513 |
+
id="metricSelect"
|
| 514 |
+
className="w-full border rounded-md px-3 py-2 bg-white shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
|
| 515 |
+
value={selectedMetricDisplayKey || ""}
|
| 516 |
+
onChange={(e) => setSelectedMetricDisplayKey(e.target.value)}
|
| 517 |
+
disabled={currentMetricDisplayKeys.length === 0}
|
| 518 |
+
>
|
| 519 |
+
<option value="" disabled>
|
| 520 |
+
Select metric
|
| 521 |
+
</option>
|
| 522 |
+
{/* Iterate through Title Case keys */}
|
| 523 |
+
{currentMetricDisplayKeys.map((displayKey) => (
|
| 524 |
+
<option key={displayKey} value={displayKey}>
|
| 525 |
+
{displayKey}
|
| 526 |
+
</option>
|
| 527 |
+
))}
|
| 528 |
+
</select>
|
| 529 |
+
{!selectedMetricDisplayKey &&
|
| 530 |
+
currentMetricDisplayKeys.length > 0 && (
|
| 531 |
+
<p className="mt-1 text-xs text-gray-500">
|
| 532 |
+
Select a metric to view analysis.
|
| 533 |
+
</p>
|
| 534 |
+
)}
|
| 535 |
+
{currentMetricDisplayKeys.length === 0 && (
|
| 536 |
+
<p className="mt-1 text-xs text-amber-600">
|
| 537 |
+
No {metricLevel} metrics available.
|
| 538 |
+
</p>
|
| 539 |
+
)}
|
| 540 |
+
</div>
|
| 541 |
+
</div>
|
| 542 |
+
</div>
|
| 543 |
+
|
| 544 |
+
{/* Demographic Breakdown Chart */}
|
| 545 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 546 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 547 |
+
<h3 className="font-semibold text-gray-800">
|
| 548 |
+
{selectedMetricDisplayKey || "Metric"} Scores across{" "}
|
| 549 |
+
{formatDisplayKey(selectedDemographicFactor) || "Groups"}
|
| 550 |
+
<InfoTooltip
|
| 551 |
+
text={`Shows the average score (0-100) for each model within each subgroup of ${formatDisplayKey(
|
| 552 |
+
selectedDemographicFactor
|
| 553 |
+
)}. Higher scores are better.`}
|
| 554 |
+
/>
|
| 555 |
+
</h3>
|
| 556 |
+
</div>
|
| 557 |
+
<div className="p-4">
|
| 558 |
+
{demographicChartData.length > 0 && modelsWithDemoData.length > 0 ? (
|
| 559 |
+
<div className="h-80">
|
| 560 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 561 |
+
<BarChart
|
| 562 |
+
data={demographicChartData}
|
| 563 |
+
margin={{ top: 5, right: 5, left: 0, bottom: 60 }}
|
| 564 |
+
>
|
| 565 |
+
<CartesianGrid strokeDasharray="3 3" vertical={false} />
|
| 566 |
+
<XAxis
|
| 567 |
+
dataKey="level"
|
| 568 |
+
angle={-45}
|
| 569 |
+
textAnchor="end"
|
| 570 |
+
tick={{ fontSize: 11 }}
|
| 571 |
+
interval={0}
|
| 572 |
+
height={70}
|
| 573 |
+
/>
|
| 574 |
+
<YAxis domain={[0, 100]} tick={{ fontSize: 11 }} width={40} />
|
| 575 |
+
<RechartsTooltip
|
| 576 |
+
content={<CustomDemographicTooltip />}
|
| 577 |
+
wrapperStyle={{ zIndex: 10 }}
|
| 578 |
+
/>
|
| 579 |
+
<Legend
|
| 580 |
+
layout="horizontal"
|
| 581 |
+
verticalAlign="bottom"
|
| 582 |
+
align="center"
|
| 583 |
+
wrapperStyle={{ paddingTop: 30 }}
|
| 584 |
+
iconSize={10}
|
| 585 |
+
/>
|
| 586 |
+
{modelsWithDemoData.map((modelName) => (
|
| 587 |
+
<Bar
|
| 588 |
+
key={modelName}
|
| 589 |
+
dataKey={modelName}
|
| 590 |
+
name={modelName}
|
| 591 |
+
fill={getModelColor(modelName)}
|
| 592 |
+
/>
|
| 593 |
+
))}
|
| 594 |
+
</BarChart>
|
| 595 |
+
</ResponsiveContainer>
|
| 596 |
+
</div>
|
| 597 |
+
) : (
|
| 598 |
+
<div className="flex items-center justify-center h-60 bg-gray-50 rounded">
|
| 599 |
+
<div className="text-center p-4">
|
| 600 |
+
<svg
|
| 601 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 602 |
+
className="h-10 w-10 mx-auto text-gray-400 mb-3"
|
| 603 |
+
fill="none"
|
| 604 |
+
viewBox="0 0 24 24"
|
| 605 |
+
stroke="currentColor"
|
| 606 |
+
>
|
| 607 |
+
<path
|
| 608 |
+
strokeLinecap="round"
|
| 609 |
+
strokeLinejoin="round"
|
| 610 |
+
strokeWidth={2}
|
| 611 |
+
d="M9 17v-2m3 2v-4m3 4v-6m2 10H7a2 2 0 01-2-2V7a2 2 0 012-2h2l2-3h6l2 3h2a2 2 0 012 2v10a2 2 0 01-2 2h-1"
|
| 612 |
+
/>
|
| 613 |
+
</svg>
|
| 614 |
+
<h3 className="text-lg font-medium text-gray-900 mb-1">
|
| 615 |
+
No Data Available
|
| 616 |
+
</h3>
|
| 617 |
+
<p className="text-sm text-gray-600">
|
| 618 |
+
{!selectedDemographicFactor
|
| 619 |
+
? "Please select a demographic factor."
|
| 620 |
+
: !selectedMetricDisplayKey
|
| 621 |
+
? "Please select a metric."
|
| 622 |
+
: "No score data found."}
|
| 623 |
+
</p>
|
| 624 |
+
</div>
|
| 625 |
+
</div>
|
| 626 |
+
)}
|
| 627 |
+
</div>
|
| 628 |
+
</div>
|
| 629 |
+
|
| 630 |
+
{/* Equity Gap Comparison Chart */}
|
| 631 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 632 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 633 |
+
<h3 className="font-semibold text-gray-800">
|
| 634 |
+
Equity Gap Comparison for {selectedMetricDisplayKey || "Metric"}
|
| 635 |
+
<InfoTooltip
|
| 636 |
+
text={`Compares the maximum score difference observed between ${formatDisplayKey(
|
| 637 |
+
selectedDemographicFactor
|
| 638 |
+
)} groups for each model. Lower gaps indicate better equity.`}
|
| 639 |
+
/>
|
| 640 |
+
</h3>
|
| 641 |
+
</div>
|
| 642 |
+
<div className="p-4">
|
| 643 |
+
{equityGapChartData.length > 0 ? (
|
| 644 |
+
<div className="h-72">
|
| 645 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 646 |
+
<BarChart
|
| 647 |
+
data={equityGapChartData}
|
| 648 |
+
margin={{ top: 5, right: 30, left: 5, bottom: 5 }}
|
| 649 |
+
layout="vertical"
|
| 650 |
+
>
|
| 651 |
+
<CartesianGrid
|
| 652 |
+
strokeDasharray="3 3"
|
| 653 |
+
horizontal={true}
|
| 654 |
+
vertical={false}
|
| 655 |
+
/>
|
| 656 |
+
<XAxis
|
| 657 |
+
type="number"
|
| 658 |
+
dataKey="gap"
|
| 659 |
+
domain={[0, "auto"]}
|
| 660 |
+
tick={{ fontSize: 11 }}
|
| 661 |
+
allowDecimals={false}
|
| 662 |
+
/>
|
| 663 |
+
<YAxis
|
| 664 |
+
dataKey="model"
|
| 665 |
+
type="category"
|
| 666 |
+
width={130}
|
| 667 |
+
tick={{ fontSize: 11 }}
|
| 668 |
+
/>
|
| 669 |
+
<RechartsTooltip
|
| 670 |
+
content={<EquityGapTooltip />}
|
| 671 |
+
wrapperStyle={{ zIndex: 10 }}
|
| 672 |
+
/>
|
| 673 |
+
<Bar
|
| 674 |
+
dataKey="gap"
|
| 675 |
+
name="Equity Gap"
|
| 676 |
+
barSize={20}
|
| 677 |
+
radius={[0, 4, 4, 0]}
|
| 678 |
+
>
|
| 679 |
+
{equityGapChartData.map((entry, index) => (
|
| 680 |
+
<Cell
|
| 681 |
+
key={`cell-${index}`}
|
| 682 |
+
fill={entry.color}
|
| 683 |
+
fillOpacity={0.8}
|
| 684 |
+
/>
|
| 685 |
+
))}
|
| 686 |
+
<LabelList
|
| 687 |
+
dataKey="gap"
|
| 688 |
+
position="right"
|
| 689 |
+
formatter={(value) => value?.toFixed(1) ?? ""}
|
| 690 |
+
style={{ fontSize: 11, fill: "#6b7280" }}
|
| 691 |
+
/>
|
| 692 |
+
</Bar>
|
| 693 |
+
</BarChart>
|
| 694 |
+
</ResponsiveContainer>
|
| 695 |
+
</div>
|
| 696 |
+
) : (
|
| 697 |
+
<div className="flex items-center justify-center h-60 bg-gray-50 rounded">
|
| 698 |
+
<div className="text-center p-4">
|
| 699 |
+
<svg
|
| 700 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 701 |
+
className="h-10 w-10 mx-auto text-gray-400 mb-3"
|
| 702 |
+
fill="none"
|
| 703 |
+
viewBox="0 0 24 24"
|
| 704 |
+
stroke="currentColor"
|
| 705 |
+
>
|
| 706 |
+
<path
|
| 707 |
+
strokeLinecap="round"
|
| 708 |
+
strokeLinejoin="round"
|
| 709 |
+
strokeWidth={2}
|
| 710 |
+
d="M9 17v-2m3 2v-4m3 4v-6m2 10H7a2 2 0 01-2-2V7a2 2 0 012-2h2l2-3h6l2 3h2a2 2 0 012 2v10a2 2 0 01-2 2h-1"
|
| 711 |
+
/>
|
| 712 |
+
</svg>
|
| 713 |
+
<h3 className="text-lg font-medium text-gray-900 mb-1">
|
| 714 |
+
No Equity Gap Data
|
| 715 |
+
</h3>
|
| 716 |
+
<p className="text-sm text-gray-600">
|
| 717 |
+
{!selectedDemographicFactor
|
| 718 |
+
? "Select factor."
|
| 719 |
+
: !selectedMetricDisplayKey
|
| 720 |
+
? "Select metric."
|
| 721 |
+
: "No equity gaps found."}
|
| 722 |
+
</p>
|
| 723 |
+
</div>
|
| 724 |
+
</div>
|
| 725 |
+
)}
|
| 726 |
+
{equityGapChartData.length > 0 && (
|
| 727 |
+
<p className="mt-3 text-xs text-gray-500">
|
| 728 |
+
Chart ranks models by equity gap size (lower is better).
|
| 729 |
+
</p>
|
| 730 |
+
)}
|
| 731 |
+
</div>
|
| 732 |
+
</div>
|
| 733 |
+
|
| 734 |
+
{/* Equity Gap Details Table - IMPROVED */}
|
| 735 |
+
{equityGapChartData.length > 0 && (
|
| 736 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 737 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 738 |
+
<h3 className="font-semibold text-gray-800">
|
| 739 |
+
Detailed Equity Gaps: {selectedMetricDisplayKey || "Metric"} by{" "}
|
| 740 |
+
{formatDisplayKey(selectedDemographicFactor) || "Factor"}
|
| 741 |
+
</h3>
|
| 742 |
+
</div>
|
| 743 |
+
<div className="p-4 overflow-x-auto">
|
| 744 |
+
<table className="min-w-full divide-y divide-gray-200">
|
| 745 |
+
<thead className="bg-gray-50">
|
| 746 |
+
<tr>
|
| 747 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 748 |
+
Rank
|
| 749 |
+
</th>
|
| 750 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 751 |
+
Model
|
| 752 |
+
</th>
|
| 753 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 754 |
+
Equity Gap
|
| 755 |
+
</th>
|
| 756 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 757 |
+
Effect Size
|
| 758 |
+
</th>
|
| 759 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 760 |
+
Significance
|
| 761 |
+
</th>
|
| 762 |
+
<th className="px-3 py-2 text-center text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 763 |
+
Concern?
|
| 764 |
+
</th>
|
| 765 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 766 |
+
Lowest Group (Score)
|
| 767 |
+
</th>
|
| 768 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
| 769 |
+
Highest Group (Score)
|
| 770 |
+
</th>
|
| 771 |
+
</tr>
|
| 772 |
+
</thead>
|
| 773 |
+
<tbody className="bg-white divide-y divide-gray-200">
|
| 774 |
+
{equityGapChartData.map((gap) => {
|
| 775 |
+
const minScoreDisplay =
|
| 776 |
+
typeof gap.min_score === "number"
|
| 777 |
+
? gap.min_score.toFixed(1)
|
| 778 |
+
: "-";
|
| 779 |
+
const maxScoreDisplay =
|
| 780 |
+
typeof gap.max_score === "number"
|
| 781 |
+
? gap.max_score.toFixed(1)
|
| 782 |
+
: "-";
|
| 783 |
+
|
| 784 |
+
return (
|
| 785 |
+
<tr
|
| 786 |
+
key={gap.model}
|
| 787 |
+
className={`hover:bg-gray-50 ${
|
| 788 |
+
gap.is_equity_concern ? "bg-red-50" : ""
|
| 789 |
+
}`}
|
| 790 |
+
>
|
| 791 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm text-gray-500">
|
| 792 |
+
{gap.rank}
|
| 793 |
+
</td>
|
| 794 |
+
<td className="px-3 py-2 whitespace-nowrap">
|
| 795 |
+
<div className="flex items-center">
|
| 796 |
+
<div
|
| 797 |
+
className="w-3 h-3 rounded-full mr-2 flex-shrink-0"
|
| 798 |
+
style={{ backgroundColor: gap.color }}
|
| 799 |
+
></div>
|
| 800 |
+
<span className="text-sm font-medium text-gray-900">
|
| 801 |
+
{gap.model}
|
| 802 |
+
</span>
|
| 803 |
+
</div>
|
| 804 |
+
</td>
|
| 805 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm font-medium">
|
| 806 |
+
{/* Equity Gap as plain text */}
|
| 807 |
+
{gap.gap !== undefined && gap.gap !== null
|
| 808 |
+
? gap.gap.toFixed(1)
|
| 809 |
+
: "N/A"}
|
| 810 |
+
</td>
|
| 811 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm">
|
| 812 |
+
{gap.effect_size !== undefined &&
|
| 813 |
+
gap.effect_size !== null ? (
|
| 814 |
+
<div className="flex items-center">
|
| 815 |
+
<span
|
| 816 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getEffectSizeBadgeStyle(
|
| 817 |
+
gap.effect_size_class
|
| 818 |
+
)}`}
|
| 819 |
+
>
|
| 820 |
+
{gap.effect_size_class || "N/A"}
|
| 821 |
+
</span>
|
| 822 |
+
<InfoTooltip
|
| 823 |
+
text={getEffectSizeTooltip(gap.effect_size)}
|
| 824 |
+
/>
|
| 825 |
+
</div>
|
| 826 |
+
) : (
|
| 827 |
+
<span className="text-gray-500">N/A</span>
|
| 828 |
+
)}
|
| 829 |
+
</td>
|
| 830 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm">
|
| 831 |
+
<div className="flex flex-col">
|
| 832 |
+
<div className="flex items-center">
|
| 833 |
+
<span
|
| 834 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getSignificanceBadgeStyle(
|
| 835 |
+
gap.is_statistically_significant
|
| 836 |
+
)}`}
|
| 837 |
+
>
|
| 838 |
+
{gap.is_statistically_significant ? (
|
| 839 |
+
<span>Significant ✔</span>
|
| 840 |
+
) : (
|
| 841 |
+
<span>Not Significant ✘</span>
|
| 842 |
+
)}
|
| 843 |
+
</span>
|
| 844 |
+
</div>
|
| 845 |
+
<div className="text-xs text-gray-500 mt-1">
|
| 846 |
+
{gap.p_value !== undefined && gap.p_value !== null
|
| 847 |
+
? formatPValue(gap.p_value)
|
| 848 |
+
: ""}
|
| 849 |
+
</div>
|
| 850 |
+
</div>
|
| 851 |
+
</td>
|
| 852 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm text-center">
|
| 853 |
+
<span
|
| 854 |
+
className={`inline-block px-2 py-0.5 rounded-full text-xs font-medium ${getConcernBadgeStyle(
|
| 855 |
+
gap.is_equity_concern
|
| 856 |
+
)}`}
|
| 857 |
+
>
|
| 858 |
+
{gap.is_equity_concern ? "Yes" : "No"}
|
| 859 |
+
</span>
|
| 860 |
+
</td>
|
| 861 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm">
|
| 862 |
+
{gap.min_level ? (
|
| 863 |
+
<div className="flex flex-col">
|
| 864 |
+
<span className="font-medium">{gap.min_level}</span>
|
| 865 |
+
<span className="text-gray-500">
|
| 866 |
+
{minScoreDisplay}
|
| 867 |
+
</span>
|
| 868 |
+
</div>
|
| 869 |
+
) : (
|
| 870 |
+
<span className="text-gray-500">-</span>
|
| 871 |
+
)}
|
| 872 |
+
</td>
|
| 873 |
+
<td className="px-3 py-2 whitespace-nowrap text-sm">
|
| 874 |
+
{gap.max_level ? (
|
| 875 |
+
<div className="flex flex-col">
|
| 876 |
+
<span className="font-medium">{gap.max_level}</span>
|
| 877 |
+
<span className="text-gray-500">
|
| 878 |
+
{maxScoreDisplay}
|
| 879 |
+
</span>
|
| 880 |
+
</div>
|
| 881 |
+
) : (
|
| 882 |
+
<span className="text-gray-500">-</span>
|
| 883 |
+
)}
|
| 884 |
+
</td>
|
| 885 |
+
</tr>
|
| 886 |
+
);
|
| 887 |
+
})}
|
| 888 |
+
</tbody>
|
| 889 |
+
</table>
|
| 890 |
+
</div>
|
| 891 |
+
{/* Table Footer/Explanation - IMPROVED */}
|
| 892 |
+
<div className="px-4 pb-4 pt-2 text-xs text-gray-600">
|
| 893 |
+
<div className="space-y-1">
|
| 894 |
+
<p>
|
| 895 |
+
<span className="font-semibold">Rank:</span> Based on lowest
|
| 896 |
+
Equity Gap value for this metric/factor
|
| 897 |
+
</p>
|
| 898 |
+
<p>
|
| 899 |
+
<span className="font-semibold">Equity Gap:</span> Score
|
| 900 |
+
difference (0-100 points) between highest and lowest scoring
|
| 901 |
+
groups
|
| 902 |
+
</p>
|
| 903 |
+
<p>
|
| 904 |
+
<span className="font-semibold">Effect Size:</span> Gap
|
| 905 |
+
magnitude relative to score variation (hover for details)
|
| 906 |
+
</p>
|
| 907 |
+
<p>
|
| 908 |
+
<span className="font-semibold">Significance:</span>Whether the
|
| 909 |
+
gap is statistically significant after adjusting for multiple
|
| 910 |
+
tests (Benjamini-Hochberg FDR correction, q<0.05)
|
| 911 |
+
</p>
|
| 912 |
+
<p>
|
| 913 |
+
<span className="font-semibold">Concern?:</span> 'Yes' flags
|
| 914 |
+
potential equity concerns (Large Effect Size AND Statistically
|
| 915 |
+
Significant)
|
| 916 |
+
</p>
|
| 917 |
+
</div>
|
| 918 |
+
</div>
|
| 919 |
+
</div>
|
| 920 |
+
)}
|
| 921 |
+
</div>
|
| 922 |
+
);
|
| 923 |
+
};
|
| 924 |
+
|
| 925 |
+
export default DemographicAnalysis;
|
leaderboard-app/components/LLMComparisonDashboard.jsx
ADDED
|
@@ -0,0 +1,639 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// components/LLMComparisonDashboard.jsx
|
| 2 |
+
|
| 3 |
+
"use client";
|
| 4 |
+
|
| 5 |
+
import React, { useState, useMemo } from "react";
|
| 6 |
+
import {
|
| 7 |
+
getScoreBadgeColor,
|
| 8 |
+
formatDisplayKey, // Use this for displaying snake_case keys nicely
|
| 9 |
+
getMetricTooltip,
|
| 10 |
+
getEquityIndicatorStyle, // Use this for Max Equity Gap status
|
| 11 |
+
} from "../lib/utils"; // Adjust path as needed
|
| 12 |
+
import TaskPerformance from "./TaskPerformance";
|
| 13 |
+
import DemographicAnalysis from "./DemographicAnalysis";
|
| 14 |
+
import MetricsBreakdown from "./MetricsBreakdown";
|
| 15 |
+
import About from "./About";
|
| 16 |
+
import { Tooltip } from "./Tooltip"; // Assuming this is your Tooltip component
|
| 17 |
+
|
| 18 |
+
// Helper component for info tooltips (assuming it exists and works)
|
| 19 |
+
const InfoTooltip = ({ text }) => {
|
| 20 |
+
const [isVisible, setIsVisible] = useState(false);
|
| 21 |
+
return (
|
| 22 |
+
<div className="relative inline-block ml-1 align-middle">
|
| 23 |
+
<button
|
| 24 |
+
className="text-gray-400 hover:text-gray-600 focus:outline-none"
|
| 25 |
+
onMouseEnter={() => setIsVisible(true)}
|
| 26 |
+
onMouseLeave={() => setIsVisible(false)}
|
| 27 |
+
onClick={(e) => {
|
| 28 |
+
e.stopPropagation();
|
| 29 |
+
setIsVisible(!isVisible);
|
| 30 |
+
}}
|
| 31 |
+
aria-label="Info"
|
| 32 |
+
>
|
| 33 |
+
<svg
|
| 34 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 35 |
+
className="h-4 w-4"
|
| 36 |
+
viewBox="0 0 20 20"
|
| 37 |
+
fill="currentColor"
|
| 38 |
+
>
|
| 39 |
+
<path
|
| 40 |
+
fillRule="evenodd"
|
| 41 |
+
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
|
| 42 |
+
clipRule="evenodd"
|
| 43 |
+
/>
|
| 44 |
+
</svg>
|
| 45 |
+
</button>
|
| 46 |
+
{isVisible && (
|
| 47 |
+
<div className="absolute z-10 w-64 p-2 bg-white border rounded shadow-lg text-xs text-gray-700 -translate-x-1/2 left-1/2 mt-1 normal-case">
|
| 48 |
+
{text}
|
| 49 |
+
</div>
|
| 50 |
+
)}
|
| 51 |
+
</div>
|
| 52 |
+
);
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
// Main dashboard component
|
| 56 |
+
const LLMComparisonDashboard = ({ data: processedData }) => {
|
| 57 |
+
const [activeTab, setActiveTab] = useState("overview");
|
| 58 |
+
const [topPerformersView, setTopPerformersView] = useState("high-level");
|
| 59 |
+
|
| 60 |
+
// Destructure data - top-level keys are camelCase
|
| 61 |
+
// Nested rawData and equityAnalysis retain original snake_case keys
|
| 62 |
+
const {
|
| 63 |
+
models: rankedModels = [], // This is overallRankingProcessed with camelCase keys
|
| 64 |
+
metricsData = { highLevelCategories: {}, lowLevelMetrics: {} }, // Title Case keys inside
|
| 65 |
+
radarData = [],
|
| 66 |
+
overviewCardData = {}, // camelCase keys inside expected
|
| 67 |
+
rawData = {
|
| 68 |
+
// camelCase keys for objects, snake_case keys inside those objects
|
| 69 |
+
taskLevelPerformance: {},
|
| 70 |
+
mrpDemographics: {},
|
| 71 |
+
demographicOptions: {},
|
| 72 |
+
availableMetrics: [], // Title Case
|
| 73 |
+
tasks: [],
|
| 74 |
+
taskCategories: {},
|
| 75 |
+
taskMetrics: [], // Title Case
|
| 76 |
+
taskMetricsSnake: [], // snake_case
|
| 77 |
+
taskCategoryMap: {},
|
| 78 |
+
},
|
| 79 |
+
bestPerCategory = {}, // Title Case keys
|
| 80 |
+
bestPerMetric = {}, // Title Case keys
|
| 81 |
+
equityAnalysis = {
|
| 82 |
+
// Original snake_case keys
|
| 83 |
+
all_equity_gaps: [],
|
| 84 |
+
model_max_effect_gaps: {},
|
| 85 |
+
universal_issues: [],
|
| 86 |
+
assessment_method: {},
|
| 87 |
+
demographic_variation_stats: {},
|
| 88 |
+
},
|
| 89 |
+
metadata = {}, // Original keys
|
| 90 |
+
} = processedData || {};
|
| 91 |
+
|
| 92 |
+
// NEW: Helper function to get color for Max Equity Gap bubble
|
| 93 |
+
const getEquityGapBadgeColor = (model) => {
|
| 94 |
+
const isConcern = model.maxEffectConcernFlag;
|
| 95 |
+
const isSignificant = model.maxEffectSignificant;
|
| 96 |
+
const effectSizeClass = model.maxEffectSizeClass;
|
| 97 |
+
const isLargeEffect = effectSizeClass === "Large";
|
| 98 |
+
|
| 99 |
+
if (isConcern && isSignificant && isLargeEffect) {
|
| 100 |
+
return "bg-red-100 text-red-800"; // Equity Concern
|
| 101 |
+
}
|
| 102 |
+
if (isLargeEffect) {
|
| 103 |
+
return "bg-yellow-100 text-yellow-800"; // Large Effect
|
| 104 |
+
}
|
| 105 |
+
if (isSignificant) {
|
| 106 |
+
return "bg-blue-100 text-blue-800"; // Significant
|
| 107 |
+
}
|
| 108 |
+
return "bg-gray-100 text-gray-800"; // No concern
|
| 109 |
+
};
|
| 110 |
+
|
| 111 |
+
// UPDATED: Render cell for Max Equity Gap column with bubble design
|
| 112 |
+
const renderMaxEquityGapCell = (model) => {
|
| 113 |
+
// model object has camelCase keys
|
| 114 |
+
const gapValue = model.maxEffectGap;
|
| 115 |
+
const isConcern = model.maxEffectConcernFlag;
|
| 116 |
+
const significanceStatus = model.maxEffectSignificant;
|
| 117 |
+
const pValue = model.maxEffectPValue;
|
| 118 |
+
const effectSizeClass = model.maxEffectSizeClass;
|
| 119 |
+
const isLargeEffect = effectSizeClass === "Large";
|
| 120 |
+
// Access nested details using original snake_case keys
|
| 121 |
+
const gapDetails = model.maxEffectGapDetails || {};
|
| 122 |
+
const ciLower = gapDetails.gap_confidence_interval_95_lower;
|
| 123 |
+
const ciUpper = gapDetails.gap_confidence_interval_95_upper;
|
| 124 |
+
|
| 125 |
+
const displayValue =
|
| 126 |
+
typeof gapValue === "number" ? gapValue.toFixed(1) : "N/A";
|
| 127 |
+
if (displayValue === "N/A")
|
| 128 |
+
return <span className="text-xs text-gray-500">N/A</span>;
|
| 129 |
+
|
| 130 |
+
const indicator = getEquityIndicatorStyle(
|
| 131 |
+
isConcern,
|
| 132 |
+
isLargeEffect,
|
| 133 |
+
significanceStatus,
|
| 134 |
+
pValue,
|
| 135 |
+
effectSizeClass
|
| 136 |
+
);
|
| 137 |
+
let fullTooltipContent = indicator.tooltip;
|
| 138 |
+
if (typeof ciLower === "number" && typeof ciUpper === "number") {
|
| 139 |
+
fullTooltipContent += `\n95% CI: [${ciLower.toFixed(
|
| 140 |
+
1
|
| 141 |
+
)}, ${ciUpper.toFixed(1)}]`;
|
| 142 |
+
} else {
|
| 143 |
+
fullTooltipContent += `\n95% CI: N/A`;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
return (
|
| 147 |
+
<Tooltip
|
| 148 |
+
content={
|
| 149 |
+
<div className="whitespace-pre-line">{fullTooltipContent}</div>
|
| 150 |
+
}
|
| 151 |
+
>
|
| 152 |
+
<span
|
| 153 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getEquityGapBadgeColor(
|
| 154 |
+
model
|
| 155 |
+
)}`}
|
| 156 |
+
>
|
| 157 |
+
{displayValue}
|
| 158 |
+
</span>
|
| 159 |
+
</Tooltip>
|
| 160 |
+
);
|
| 161 |
+
};
|
| 162 |
+
|
| 163 |
+
// NEW: Helper for equity concerns percentage badge color
|
| 164 |
+
const getEquityConcernBadgeColor = (percentage) => {
|
| 165 |
+
if (percentage === null || percentage === undefined)
|
| 166 |
+
return "bg-gray-100 text-gray-800";
|
| 167 |
+
if (percentage === 0) return "bg-green-100 text-green-800";
|
| 168 |
+
if (percentage <= 2.5) return "bg-blue-100 text-blue-800";
|
| 169 |
+
if (percentage <= 5) return "bg-yellow-100 text-yellow-800";
|
| 170 |
+
return "bg-red-100 text-red-800";
|
| 171 |
+
};
|
| 172 |
+
|
| 173 |
+
return (
|
| 174 |
+
<div className="max-w-7xl mx-auto p-4 bg-white">
|
| 175 |
+
{/* Header */}
|
| 176 |
+
<div className="relative mb-6 overflow-hidden">
|
| 177 |
+
<div className="absolute inset-0 bg-gradient-to-br from-blue-50 to-sky-50 opacity-70"></div>
|
| 178 |
+
<div className="relative max-w-5xl mx-auto px-6 py-6">
|
| 179 |
+
<div className="text-center">
|
| 180 |
+
<h1 className="text-4xl font-bold mb-2 tracking-narrow text-blue-700">
|
| 181 |
+
Prolific's AI User Experience Leaderboard
|
| 182 |
+
</h1>
|
| 183 |
+
|
| 184 |
+
<p className="text-gray-600 max-w-4xl mx-auto">
|
| 185 |
+
A benchmark assessing how well language models handle real-world
|
| 186 |
+
tasks based on user experiences.
|
| 187 |
+
</p>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
{/* Tab Buttons */}
|
| 192 |
+
<div className="flex flex-wrap mb-6 border-b">
|
| 193 |
+
{[
|
| 194 |
+
"overview",
|
| 195 |
+
"metrics-breakdown",
|
| 196 |
+
"task-performance",
|
| 197 |
+
"demographic-analysis",
|
| 198 |
+
"about",
|
| 199 |
+
].map((tab) => (
|
| 200 |
+
<button
|
| 201 |
+
key={tab}
|
| 202 |
+
className={`px-4 py-2 font-medium capitalize ${
|
| 203 |
+
activeTab === tab
|
| 204 |
+
? "text-blue-600 border-b-2 border-blue-600"
|
| 205 |
+
: "text-gray-500 hover:text-gray-700"
|
| 206 |
+
}`}
|
| 207 |
+
onClick={() => setActiveTab(tab)}
|
| 208 |
+
>
|
| 209 |
+
{" "}
|
| 210 |
+
{tab.replace("-", " ")}{" "}
|
| 211 |
+
</button>
|
| 212 |
+
))}
|
| 213 |
+
</div>
|
| 214 |
+
{/* Overview Tab */}
|
| 215 |
+
{activeTab === "overview" && (
|
| 216 |
+
<div>
|
| 217 |
+
{/* Overall Rankings Card */}
|
| 218 |
+
<div className="mb-6 border rounded-lg overflow-hidden shadow-sm">
|
| 219 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 220 |
+
<h2 className="text-xl font-semibold text-gray-800">
|
| 221 |
+
Overall Model Rankings
|
| 222 |
+
</h2>
|
| 223 |
+
</div>
|
| 224 |
+
<div className="p-4">
|
| 225 |
+
<div className="overflow-x-auto">
|
| 226 |
+
<table className="w-full min-w-[850px] table-auto divide-y divide-gray-200">
|
| 227 |
+
<thead>
|
| 228 |
+
<tr className="bg-gray-50">
|
| 229 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-12">
|
| 230 |
+
Rank
|
| 231 |
+
</th>
|
| 232 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-48">
|
| 233 |
+
Model
|
| 234 |
+
</th>
|
| 235 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-28">
|
| 236 |
+
<span>Overall Score</span>
|
| 237 |
+
</th>
|
| 238 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-24">
|
| 239 |
+
<span>Overall SD</span>
|
| 240 |
+
</th>
|
| 241 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-32">
|
| 242 |
+
<span>Max Equity Gap</span>
|
| 243 |
+
</th>
|
| 244 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-38">
|
| 245 |
+
<span>Max Gap Area</span>
|
| 246 |
+
</th>
|
| 247 |
+
<th className="px-3 py-2 text-center text-xs font-medium text-gray-500 uppercase tracking-wider w-36">
|
| 248 |
+
<span>Equity Concerns</span>
|
| 249 |
+
</th>
|
| 250 |
+
<th className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider w-32">
|
| 251 |
+
<span>User Retention</span>
|
| 252 |
+
</th>
|
| 253 |
+
</tr>
|
| 254 |
+
</thead>
|
| 255 |
+
<tbody className="divide-y divide-gray-200">
|
| 256 |
+
{/* Use camelCase model object from rankedModels */}
|
| 257 |
+
{rankedModels.map((model) => (
|
| 258 |
+
<tr key={model.model} className="hover:bg-gray-50">
|
| 259 |
+
<td className="px-3 py-3 text-sm font-medium text-gray-900">
|
| 260 |
+
{model.rank}
|
| 261 |
+
</td>
|
| 262 |
+
<td className="px-3 py-3">
|
| 263 |
+
<div className="flex items-center">
|
| 264 |
+
<div
|
| 265 |
+
className="w-3 h-3 rounded-full mr-2 flex-shrink-0"
|
| 266 |
+
style={{ backgroundColor: model.color }}
|
| 267 |
+
></div>
|
| 268 |
+
<span className="text-sm font-medium text-gray-900">
|
| 269 |
+
{model.model}
|
| 270 |
+
</span>
|
| 271 |
+
</div>
|
| 272 |
+
</td>
|
| 273 |
+
<td className="px-3 py-3 text-sm font-semibold text-gray-800">
|
| 274 |
+
{model.overallScore !== null
|
| 275 |
+
? model.overallScore.toFixed(1)
|
| 276 |
+
: "N/A"}
|
| 277 |
+
</td>
|
| 278 |
+
<td className="px-3 py-3 text-sm text-gray-600">
|
| 279 |
+
{model.stdDevAcrossCats !== "N/A" &&
|
| 280 |
+
model.stdDevAcrossCats !== null
|
| 281 |
+
? `± ${Number(model.stdDevAcrossCats).toFixed(1)}`
|
| 282 |
+
: "N/A"}
|
| 283 |
+
</td>
|
| 284 |
+
<td className="px-3 py-3 text-sm">
|
| 285 |
+
{renderMaxEquityGapCell(model)}
|
| 286 |
+
</td>
|
| 287 |
+
<td className="px-3 py-3">
|
| 288 |
+
{model.maxEffectFactor &&
|
| 289 |
+
model.maxEffectFactor !== "N/A" ? (
|
| 290 |
+
<div className="flex flex-col">
|
| 291 |
+
<span className="text-xs font-medium text-gray-900">
|
| 292 |
+
{formatDisplayKey(model.maxEffectFactor)}
|
| 293 |
+
</span>
|
| 294 |
+
<span className="text-xs text-gray-500">
|
| 295 |
+
{formatDisplayKey(model.maxEffectCategory)}
|
| 296 |
+
</span>
|
| 297 |
+
</div>
|
| 298 |
+
) : (
|
| 299 |
+
<span className="text-xs text-gray-500">N/A</span>
|
| 300 |
+
)}
|
| 301 |
+
</td>
|
| 302 |
+
<td className="px-3 py-3 text-sm text-center">
|
| 303 |
+
{model.equityConcernPercentage !== null ? (
|
| 304 |
+
<span>
|
| 305 |
+
{model.equityConcernPercentage.toFixed(1)}%
|
| 306 |
+
</span>
|
| 307 |
+
) : (
|
| 308 |
+
<span className="text-xs text-gray-500">N/A</span>
|
| 309 |
+
)}
|
| 310 |
+
</td>
|
| 311 |
+
<td className="px-3 py-3 text-sm">
|
| 312 |
+
{model.repeatUsageScore !== null ? (
|
| 313 |
+
<span
|
| 314 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
|
| 315 |
+
model.repeatUsageScore
|
| 316 |
+
)}`}
|
| 317 |
+
>
|
| 318 |
+
{model.repeatUsageScore.toFixed(1)}%
|
| 319 |
+
</span>
|
| 320 |
+
) : (
|
| 321 |
+
<span className="text-xs text-gray-500">N/A</span>
|
| 322 |
+
)}
|
| 323 |
+
</td>
|
| 324 |
+
</tr>
|
| 325 |
+
))}
|
| 326 |
+
</tbody>
|
| 327 |
+
</table>
|
| 328 |
+
</div>
|
| 329 |
+
{/* UPDATED: Vertical list for column descriptions with detailed info */}
|
| 330 |
+
<div className="mt-4 pt-3 border-t border-gray-200 text-xs text-gray-600">
|
| 331 |
+
{/* Column descriptions in vertical list */}
|
| 332 |
+
<div className="mb-2">
|
| 333 |
+
<div>
|
| 334 |
+
<span className="font-semibold">Overall Score:</span> Avg.
|
| 335 |
+
score across high-level categories
|
| 336 |
+
</div>
|
| 337 |
+
<div>
|
| 338 |
+
<span className="font-semibold">Overall SD:</span> Standard
|
| 339 |
+
deviation across high-level categories (lower = more
|
| 340 |
+
consistent)
|
| 341 |
+
</div>
|
| 342 |
+
<div>
|
| 343 |
+
<span className="font-semibold">Max Equity Gap:</span>{" "}
|
| 344 |
+
Largest demographic score difference (hover for details on
|
| 345 |
+
significance and effect size)
|
| 346 |
+
</div>
|
| 347 |
+
<div>
|
| 348 |
+
<span className="font-semibold">Max Gap Area:</span>{" "}
|
| 349 |
+
Demographic group and Category where the Max Equity Gap
|
| 350 |
+
occurs
|
| 351 |
+
</div>
|
| 352 |
+
<div>
|
| 353 |
+
<span className="font-semibold">Equity Concerns:</span>{" "}
|
| 354 |
+
Percentage of demographic gaps flagged as concerns (large
|
| 355 |
+
effect & statistically significant)
|
| 356 |
+
</div>
|
| 357 |
+
<div>
|
| 358 |
+
<span className="font-semibold">User Retention:</span>{" "}
|
| 359 |
+
Percentage of participants who said they would use the model
|
| 360 |
+
again
|
| 361 |
+
</div>
|
| 362 |
+
</div>
|
| 363 |
+
|
| 364 |
+
{/* Color key on a single line */}
|
| 365 |
+
<div className="mt-2 pt-2 border-t border-gray-100 flex flex-wrap items-center gap-x-4 gap-y-2">
|
| 366 |
+
<span className="font-semibold whitespace-nowrap">
|
| 367 |
+
Color Key:
|
| 368 |
+
</span>
|
| 369 |
+
<div className="flex items-center">
|
| 370 |
+
<span className="inline-block w-4 h-4 rounded-full bg-red-100 mr-1"></span>
|
| 371 |
+
<span>
|
| 372 |
+
Equity Concern (Large Effect & Statistically Significant)
|
| 373 |
+
</span>
|
| 374 |
+
</div>
|
| 375 |
+
<div className="flex items-center">
|
| 376 |
+
<span className="inline-block w-4 h-4 rounded-full bg-yellow-100 mr-1"></span>
|
| 377 |
+
<span>Large Effect (Not Statistically Significant)</span>
|
| 378 |
+
</div>
|
| 379 |
+
</div>
|
| 380 |
+
</div>
|
| 381 |
+
</div>
|
| 382 |
+
</div>
|
| 383 |
+
|
| 384 |
+
{/* Top Performers Section */}
|
| 385 |
+
<div className="mb-4 flex items-center">
|
| 386 |
+
<h3 className="font-semibold text-xl mr-4">
|
| 387 |
+
Top Performers by Category
|
| 388 |
+
</h3>
|
| 389 |
+
<div className="flex space-x-1 p-1 bg-gray-200 rounded-lg">
|
| 390 |
+
<button
|
| 391 |
+
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
|
| 392 |
+
topPerformersView === "high-level"
|
| 393 |
+
? "bg-white shadow text-blue-600"
|
| 394 |
+
: "text-gray-600 hover:text-gray-800"
|
| 395 |
+
}`}
|
| 396 |
+
onClick={() => setTopPerformersView("high-level")}
|
| 397 |
+
>
|
| 398 |
+
{" "}
|
| 399 |
+
High-Level Categories{" "}
|
| 400 |
+
</button>
|
| 401 |
+
<button
|
| 402 |
+
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
|
| 403 |
+
topPerformersView === "low-level"
|
| 404 |
+
? "bg-white shadow text-blue-600"
|
| 405 |
+
: "text-gray-600 hover:text-gray-800"
|
| 406 |
+
}`}
|
| 407 |
+
onClick={() => setTopPerformersView("low-level")}
|
| 408 |
+
>
|
| 409 |
+
{" "}
|
| 410 |
+
Low-Level Metrics{" "}
|
| 411 |
+
</button>
|
| 412 |
+
</div>
|
| 413 |
+
</div>
|
| 414 |
+
{/* Top Performers Tables - Access using Title Case keys */}
|
| 415 |
+
{topPerformersView === "high-level" && (
|
| 416 |
+
<div className="border rounded-lg overflow-hidden shadow-sm mb-6">
|
| 417 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 418 |
+
<h3 className="font-semibold text-gray-800">
|
| 419 |
+
Top Performers by High-Level Category
|
| 420 |
+
</h3>
|
| 421 |
+
</div>
|
| 422 |
+
<div className="p-4">
|
| 423 |
+
{Object.entries(bestPerCategory || {}).length > 0 ? (
|
| 424 |
+
<table className="min-w-full divide-y divide-gray-200">
|
| 425 |
+
<thead>
|
| 426 |
+
<tr>
|
| 427 |
+
<th
|
| 428 |
+
scope="col"
|
| 429 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 430 |
+
>
|
| 431 |
+
Category
|
| 432 |
+
</th>
|
| 433 |
+
<th
|
| 434 |
+
scope="col"
|
| 435 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 436 |
+
>
|
| 437 |
+
Best Model
|
| 438 |
+
</th>
|
| 439 |
+
<th
|
| 440 |
+
scope="col"
|
| 441 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 442 |
+
>
|
| 443 |
+
Score
|
| 444 |
+
</th>
|
| 445 |
+
</tr>
|
| 446 |
+
</thead>
|
| 447 |
+
<tbody className="bg-white divide-y divide-gray-200">
|
| 448 |
+
{Object.entries(bestPerCategory)
|
| 449 |
+
.sort(([a], [b]) => a.localeCompare(b))
|
| 450 |
+
.map(([catDisplayKey, bestInfo], idx) => (
|
| 451 |
+
<tr
|
| 452 |
+
key={catDisplayKey}
|
| 453 |
+
className={
|
| 454 |
+
idx % 2 === 0 ? "bg-white" : "bg-gray-50"
|
| 455 |
+
}
|
| 456 |
+
>
|
| 457 |
+
<td className="px-3 py-2 font-medium text-sm text-gray-900">
|
| 458 |
+
<Tooltip
|
| 459 |
+
content={getMetricTooltip(catDisplayKey)}
|
| 460 |
+
>
|
| 461 |
+
<span>{catDisplayKey}</span>
|
| 462 |
+
</Tooltip>
|
| 463 |
+
</td>
|
| 464 |
+
<td className="px-3 py-2">
|
| 465 |
+
{bestInfo.model !== "N/A" ? (
|
| 466 |
+
<div className="flex items-center">
|
| 467 |
+
<div
|
| 468 |
+
className="w-3 h-3 rounded-full mr-2 shrink-0"
|
| 469 |
+
style={{ backgroundColor: bestInfo.color }}
|
| 470 |
+
></div>
|
| 471 |
+
<span className="text-sm">
|
| 472 |
+
{bestInfo.model}
|
| 473 |
+
</span>
|
| 474 |
+
</div>
|
| 475 |
+
) : (
|
| 476 |
+
<span className="text-sm text-gray-500">
|
| 477 |
+
N/A
|
| 478 |
+
</span>
|
| 479 |
+
)}
|
| 480 |
+
</td>
|
| 481 |
+
<td className="px-3 py-2">
|
| 482 |
+
{bestInfo.score !== null ? (
|
| 483 |
+
<span
|
| 484 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
|
| 485 |
+
bestInfo.score
|
| 486 |
+
)}`}
|
| 487 |
+
>
|
| 488 |
+
{bestInfo.score.toFixed(1)}
|
| 489 |
+
</span>
|
| 490 |
+
) : (
|
| 491 |
+
<span className="text-sm text-gray-500">
|
| 492 |
+
N/A
|
| 493 |
+
</span>
|
| 494 |
+
)}
|
| 495 |
+
</td>
|
| 496 |
+
</tr>
|
| 497 |
+
))}
|
| 498 |
+
</tbody>
|
| 499 |
+
</table>
|
| 500 |
+
) : (
|
| 501 |
+
<p className="text-center text-gray-500 py-4">
|
| 502 |
+
Top performer data not available.
|
| 503 |
+
</p>
|
| 504 |
+
)}
|
| 505 |
+
<p className="text-xs text-gray-500 mt-2">
|
| 506 |
+
Scores based on user ratings, normalized to 0-100.
|
| 507 |
+
</p>
|
| 508 |
+
</div>
|
| 509 |
+
</div>
|
| 510 |
+
)}
|
| 511 |
+
{topPerformersView === "low-level" && (
|
| 512 |
+
<div className="border rounded-lg overflow-hidden shadow-sm mb-6">
|
| 513 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 514 |
+
<h3 className="font-semibold text-gray-800">
|
| 515 |
+
Top Performers by Low-Level Metric
|
| 516 |
+
</h3>
|
| 517 |
+
</div>
|
| 518 |
+
<div className="p-4">
|
| 519 |
+
{Object.entries(bestPerMetric || {}).length > 0 ? (
|
| 520 |
+
<table className="min-w-full divide-y divide-gray-200">
|
| 521 |
+
<thead>
|
| 522 |
+
<tr>
|
| 523 |
+
<th
|
| 524 |
+
scope="col"
|
| 525 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 526 |
+
>
|
| 527 |
+
Metric
|
| 528 |
+
</th>
|
| 529 |
+
<th
|
| 530 |
+
scope="col"
|
| 531 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 532 |
+
>
|
| 533 |
+
Best Model
|
| 534 |
+
</th>
|
| 535 |
+
<th
|
| 536 |
+
scope="col"
|
| 537 |
+
className="px-3 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
|
| 538 |
+
>
|
| 539 |
+
Score
|
| 540 |
+
</th>
|
| 541 |
+
</tr>
|
| 542 |
+
</thead>
|
| 543 |
+
<tbody className="bg-white divide-y divide-gray-200">
|
| 544 |
+
{Object.entries(bestPerMetric)
|
| 545 |
+
.sort(([a], [b]) => a.localeCompare(b))
|
| 546 |
+
.map(([metricDisplayKey, bestInfo], idx) => (
|
| 547 |
+
<tr
|
| 548 |
+
key={metricDisplayKey}
|
| 549 |
+
className={
|
| 550 |
+
idx % 2 === 0 ? "bg-white" : "bg-gray-50"
|
| 551 |
+
}
|
| 552 |
+
>
|
| 553 |
+
<td className="px-3 py-2 font-medium text-sm text-gray-900">
|
| 554 |
+
<Tooltip
|
| 555 |
+
content={getMetricTooltip(metricDisplayKey)}
|
| 556 |
+
>
|
| 557 |
+
<span>{metricDisplayKey}</span>
|
| 558 |
+
</Tooltip>
|
| 559 |
+
</td>
|
| 560 |
+
<td className="px-3 py-2">
|
| 561 |
+
{bestInfo.model !== "N/A" ? (
|
| 562 |
+
<div className="flex items-center">
|
| 563 |
+
<div
|
| 564 |
+
className="w-3 h-3 rounded-full mr-2 shrink-0"
|
| 565 |
+
style={{ backgroundColor: bestInfo.color }}
|
| 566 |
+
></div>
|
| 567 |
+
<span className="text-sm">
|
| 568 |
+
{bestInfo.model}
|
| 569 |
+
</span>
|
| 570 |
+
</div>
|
| 571 |
+
) : (
|
| 572 |
+
<span className="text-sm text-gray-500">
|
| 573 |
+
N/A
|
| 574 |
+
</span>
|
| 575 |
+
)}
|
| 576 |
+
</td>
|
| 577 |
+
<td className="px-3 py-2">
|
| 578 |
+
{bestInfo.score !== null ? (
|
| 579 |
+
<span
|
| 580 |
+
className={`px-2 py-0.5 rounded-full text-xs font-medium ${getScoreBadgeColor(
|
| 581 |
+
bestInfo.score
|
| 582 |
+
)}`}
|
| 583 |
+
>
|
| 584 |
+
{bestInfo.score.toFixed(1)}
|
| 585 |
+
</span>
|
| 586 |
+
) : (
|
| 587 |
+
<span className="text-sm text-gray-500">
|
| 588 |
+
N/A
|
| 589 |
+
</span>
|
| 590 |
+
)}
|
| 591 |
+
</td>
|
| 592 |
+
</tr>
|
| 593 |
+
))}
|
| 594 |
+
</tbody>
|
| 595 |
+
</table>
|
| 596 |
+
) : (
|
| 597 |
+
<p className="text-center text-gray-500 py-4">
|
| 598 |
+
Low-level metric top performer data not available.
|
| 599 |
+
</p>
|
| 600 |
+
)}
|
| 601 |
+
<p className="text-xs text-gray-500 mt-2">
|
| 602 |
+
Scores based on user ratings, normalized to 0-100.
|
| 603 |
+
</p>
|
| 604 |
+
</div>
|
| 605 |
+
</div>
|
| 606 |
+
)}
|
| 607 |
+
</div>
|
| 608 |
+
)}{" "}
|
| 609 |
+
{/* End Overview Tab */}
|
| 610 |
+
{/* Other Tabs - Pass Correct Props */}
|
| 611 |
+
{activeTab === "metrics-breakdown" && (
|
| 612 |
+
<MetricsBreakdown
|
| 613 |
+
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
|
| 614 |
+
modelsMeta={rankedModels} // camelCase keys inside
|
| 615 |
+
radarData={radarData}
|
| 616 |
+
/>
|
| 617 |
+
)}
|
| 618 |
+
{activeTab === "task-performance" && (
|
| 619 |
+
<TaskPerformance
|
| 620 |
+
rawData={rawData} // Contains camelCase top-level, snake_case nested
|
| 621 |
+
modelsMeta={rankedModels}
|
| 622 |
+
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
|
| 623 |
+
overviewCardData={overviewCardData}
|
| 624 |
+
/>
|
| 625 |
+
)}
|
| 626 |
+
{activeTab === "demographic-analysis" && (
|
| 627 |
+
<DemographicAnalysis
|
| 628 |
+
rawData={rawData} // Contains camelCase top-level, snake_case/Title Case nested
|
| 629 |
+
modelsMeta={rankedModels}
|
| 630 |
+
metricsData={metricsData} // Title Case keys inside, plus internalMetricKey
|
| 631 |
+
equityAnalysis={equityAnalysis} // Original snake_case structure
|
| 632 |
+
/>
|
| 633 |
+
)}
|
| 634 |
+
{activeTab === "about" && <About metadata={metadata} />}
|
| 635 |
+
</div>
|
| 636 |
+
);
|
| 637 |
+
};
|
| 638 |
+
|
| 639 |
+
export default LLMComparisonDashboard;
|
leaderboard-app/components/MetricsBreakdown.jsx
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// components/MetricsBreakdown.jsx
|
| 2 |
+
|
| 3 |
+
"use client";
|
| 4 |
+
|
| 5 |
+
import React, { useState, useEffect, useMemo } from "react";
|
| 6 |
+
import {
|
| 7 |
+
RadarChart,
|
| 8 |
+
PolarGrid,
|
| 9 |
+
PolarAngleAxis,
|
| 10 |
+
PolarRadiusAxis,
|
| 11 |
+
Radar,
|
| 12 |
+
Tooltip as RechartsTooltip, // Renamed to avoid conflict with local Tooltip
|
| 13 |
+
Legend,
|
| 14 |
+
ResponsiveContainer,
|
| 15 |
+
} from "recharts";
|
| 16 |
+
import { getScoreColor, getMetricTooltip } from "../lib/utils";
|
| 17 |
+
import { Tooltip } from "./Tooltip"; // Your custom Tooltip component for headers etc.
|
| 18 |
+
|
| 19 |
+
// Component receives processed metrics data, model metadata, and category radar data
|
| 20 |
+
const MetricsBreakdown = ({
|
| 21 |
+
metricsData,
|
| 22 |
+
modelsMeta,
|
| 23 |
+
radarData: categoryRadarDataProp, // Already processed radar data for categories
|
| 24 |
+
}) => {
|
| 25 |
+
const [subTab, setSubTab] = useState("categories"); // 'categories' or 'metrics'
|
| 26 |
+
const [selectedModels, setSelectedModels] = useState([]);
|
| 27 |
+
|
| 28 |
+
// console.log("Metrics Data in Breakdown:", metricsData); // For debugging
|
| 29 |
+
// console.log("Models Meta in Breakdown:", modelsMeta);
|
| 30 |
+
// console.log("Category Radar Data Prop:", categoryRadarDataProp);
|
| 31 |
+
|
| 32 |
+
// Extract data from props with defaults
|
| 33 |
+
const { highLevelCategories, lowLevelMetrics } = metricsData || {
|
| 34 |
+
highLevelCategories: {},
|
| 35 |
+
lowLevelMetrics: {},
|
| 36 |
+
};
|
| 37 |
+
// Use modelsMeta directly for clarity, aliasing if preferred
|
| 38 |
+
const models = modelsMeta || [];
|
| 39 |
+
|
| 40 |
+
// Get sorted lists of category and metric names
|
| 41 |
+
const sortedCategoryNames = useMemo(
|
| 42 |
+
() =>
|
| 43 |
+
Object.keys(highLevelCategories || {}).sort((a, b) => a.localeCompare(b)),
|
| 44 |
+
[highLevelCategories]
|
| 45 |
+
);
|
| 46 |
+
const sortedMetricNames = useMemo(
|
| 47 |
+
() => Object.keys(lowLevelMetrics || {}).sort((a, b) => a.localeCompare(b)),
|
| 48 |
+
[lowLevelMetrics]
|
| 49 |
+
);
|
| 50 |
+
|
| 51 |
+
// Initialize selections
|
| 52 |
+
useEffect(() => {
|
| 53 |
+
if (selectedModels.length === 0 && models.length > 0) {
|
| 54 |
+
setSelectedModels(models.map((m) => m.model));
|
| 55 |
+
}
|
| 56 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 57 |
+
}, [models]); // Only depends on models changing/loading
|
| 58 |
+
|
| 59 |
+
// --- Memoized data generation functions ---
|
| 60 |
+
|
| 61 |
+
// Radar data for LL Metrics (used when subTab === 'metrics') - CORRECTED ACCESSORS
|
| 62 |
+
const metricRadarData = useMemo(() => {
|
| 63 |
+
if (
|
| 64 |
+
!lowLevelMetrics ||
|
| 65 |
+
models.length === 0 ||
|
| 66 |
+
sortedMetricNames.length === 0
|
| 67 |
+
)
|
| 68 |
+
return [];
|
| 69 |
+
return sortedMetricNames.map((metricName) => {
|
| 70 |
+
const entry = { category: metricName }; // Use metric name as the axis category
|
| 71 |
+
const metricData = lowLevelMetrics[metricName];
|
| 72 |
+
if (metricData) {
|
| 73 |
+
models
|
| 74 |
+
.filter((m) => selectedModels.includes(m.model))
|
| 75 |
+
.forEach((model) => {
|
| 76 |
+
// Use correct camelCase keys
|
| 77 |
+
entry[model.model] =
|
| 78 |
+
Number(metricData.modelScores?.[model.model]?.nationalScore) || 0;
|
| 79 |
+
// Standard deviation per metric is NOT available, so we don't add it here
|
| 80 |
+
});
|
| 81 |
+
}
|
| 82 |
+
return entry;
|
| 83 |
+
});
|
| 84 |
+
}, [lowLevelMetrics, models, selectedModels, sortedMetricNames]);
|
| 85 |
+
|
| 86 |
+
// Custom tooltip (common for both radar charts) - CORRECTED (removed std dev logic)
|
| 87 |
+
const CustomRadarTooltip = ({ active, payload, label }) => {
|
| 88 |
+
if (active && payload && payload.length) {
|
| 89 |
+
return (
|
| 90 |
+
<div className="bg-white p-3 border rounded shadow-lg max-w-xs opacity-95">
|
| 91 |
+
<p className="font-medium mb-1 text-gray-800">{label}</p>
|
| 92 |
+
{/* Get tooltip description for the category/metric itself */}
|
| 93 |
+
<p className="text-xs mb-3 text-gray-600 border-b pb-2">
|
| 94 |
+
{getMetricTooltip(label)}
|
| 95 |
+
</p>
|
| 96 |
+
<div className="space-y-1">
|
| 97 |
+
{payload
|
| 98 |
+
// Sort models by score within tooltip
|
| 99 |
+
.sort((a, b) => (b.value || 0) - (a.value || 0))
|
| 100 |
+
.map((entry) => (
|
| 101 |
+
<div
|
| 102 |
+
key={entry.dataKey} // dataKey is the model name here
|
| 103 |
+
className="flex items-center text-sm"
|
| 104 |
+
>
|
| 105 |
+
<div
|
| 106 |
+
className="w-2.5 h-2.5 rounded-full mr-2 flex-shrink-0"
|
| 107 |
+
style={{ backgroundColor: entry.color || "#8884d8" }}
|
| 108 |
+
></div>
|
| 109 |
+
<span className="mr-1 truncate flex-grow text-gray-700">
|
| 110 |
+
{entry.name}: {/* name is also the model name */}
|
| 111 |
+
</span>
|
| 112 |
+
<span className="font-medium flex-shrink-0 text-gray-900">
|
| 113 |
+
{/* Ensure value exists and format */}
|
| 114 |
+
{entry.value !== null && entry.value !== undefined
|
| 115 |
+
? Number(entry.value).toFixed(1)
|
| 116 |
+
: "N/A"}
|
| 117 |
+
{/* Removed standard deviation display */}
|
| 118 |
+
</span>
|
| 119 |
+
</div>
|
| 120 |
+
))}
|
| 121 |
+
</div>
|
| 122 |
+
</div>
|
| 123 |
+
);
|
| 124 |
+
}
|
| 125 |
+
return null;
|
| 126 |
+
};
|
| 127 |
+
|
| 128 |
+
// Use the radar data passed via prop for categories view, filtered by selected models - CORRECTED (removed std dev logic)
|
| 129 |
+
const filteredCategoryRadarData = useMemo(() => {
|
| 130 |
+
if (!categoryRadarDataProp || models.length === 0) return [];
|
| 131 |
+
// Filter based on selected models, removing std dev keys
|
| 132 |
+
return categoryRadarDataProp.map((item) => {
|
| 133 |
+
const newItem = { category: item.category };
|
| 134 |
+
models
|
| 135 |
+
.filter((m) => selectedModels.includes(m.model))
|
| 136 |
+
.forEach((model) => {
|
| 137 |
+
// We only need the model score itself for the radar data
|
| 138 |
+
newItem[model.model] = item[model.model] ?? 0; // Use nullish coalescing for default
|
| 139 |
+
});
|
| 140 |
+
return newItem;
|
| 141 |
+
});
|
| 142 |
+
}, [categoryRadarDataProp, models, selectedModels]);
|
| 143 |
+
|
| 144 |
+
return (
|
| 145 |
+
<>
|
| 146 |
+
{/* Top Controls: Model Selector & Sub-Tab Pills (No changes needed) */}
|
| 147 |
+
<div className="mb-6 flex flex-col md:flex-row justify-between items-center gap-4">
|
| 148 |
+
{/* Sub-Tab Pills */}
|
| 149 |
+
<div className="flex space-x-1 p-1 bg-gray-200 rounded-lg">
|
| 150 |
+
{" "}
|
| 151 |
+
<button
|
| 152 |
+
aria-pressed={subTab === "categories"}
|
| 153 |
+
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
|
| 154 |
+
subTab === "categories"
|
| 155 |
+
? "bg-white shadow text-blue-600"
|
| 156 |
+
: "text-gray-600 hover:text-gray-800"
|
| 157 |
+
}`}
|
| 158 |
+
onClick={() => setSubTab("categories")}
|
| 159 |
+
>
|
| 160 |
+
{" "}
|
| 161 |
+
High-Level Categories{" "}
|
| 162 |
+
</button>{" "}
|
| 163 |
+
<button
|
| 164 |
+
aria-pressed={subTab === "metrics"}
|
| 165 |
+
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
|
| 166 |
+
subTab === "metrics"
|
| 167 |
+
? "bg-white shadow text-blue-600"
|
| 168 |
+
: "text-gray-600 hover:text-gray-800"
|
| 169 |
+
}`}
|
| 170 |
+
onClick={() => setSubTab("metrics")}
|
| 171 |
+
>
|
| 172 |
+
{" "}
|
| 173 |
+
Low-Level Metrics{" "}
|
| 174 |
+
</button>{" "}
|
| 175 |
+
</div>
|
| 176 |
+
{/* Model Selector */}
|
| 177 |
+
<div className="flex items-center flex-wrap gap-1">
|
| 178 |
+
{" "}
|
| 179 |
+
<span className="text-sm text-gray-500 mr-2">Models:</span>{" "}
|
| 180 |
+
{models?.map((model) => (
|
| 181 |
+
<button
|
| 182 |
+
key={model.model}
|
| 183 |
+
className={`px-2 py-0.5 text-xs rounded border ${
|
| 184 |
+
selectedModels.includes(model.model)
|
| 185 |
+
? "bg-sky-100 text-sky-800 border-sky-300 font-medium"
|
| 186 |
+
: "bg-gray-100 text-gray-600 border-gray-300 hover:bg-gray-200"
|
| 187 |
+
}`}
|
| 188 |
+
onClick={() => {
|
| 189 |
+
if (selectedModels.includes(model.model)) {
|
| 190 |
+
if (selectedModels.length > 1) {
|
| 191 |
+
setSelectedModels(
|
| 192 |
+
selectedModels.filter((m) => m !== model.model)
|
| 193 |
+
);
|
| 194 |
+
}
|
| 195 |
+
} else {
|
| 196 |
+
setSelectedModels([...selectedModels, model.model]);
|
| 197 |
+
}
|
| 198 |
+
}}
|
| 199 |
+
>
|
| 200 |
+
{" "}
|
| 201 |
+
{model.model}{" "}
|
| 202 |
+
</button>
|
| 203 |
+
))}{" "}
|
| 204 |
+
</div>
|
| 205 |
+
</div>
|
| 206 |
+
|
| 207 |
+
{/* Conditional content based on sub-tab */}
|
| 208 |
+
{subTab === "categories" && (
|
| 209 |
+
<div className="space-y-6">
|
| 210 |
+
{/* CATEGORIES VIEW */}
|
| 211 |
+
{/* Summary Table: Models as Rows, Categories as Columns - CORRECTED ACCESSORS */}
|
| 212 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 213 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 214 |
+
<h3 className="font-semibold text-gray-800">
|
| 215 |
+
Category Performance Summary
|
| 216 |
+
</h3>
|
| 217 |
+
</div>
|
| 218 |
+
<div className="p-4 overflow-x-auto">
|
| 219 |
+
{sortedCategoryNames.length > 0 ? (
|
| 220 |
+
<table className="min-w-full divide-y divide-gray-200 border border-gray-200">
|
| 221 |
+
<thead>
|
| 222 |
+
<tr className="bg-gray-100">
|
| 223 |
+
<th
|
| 224 |
+
scope="col"
|
| 225 |
+
className="sticky left-0 bg-gray-100 px-3 py-2 text-left text-xs font-semibold text-gray-600 uppercase tracking-wider z-10"
|
| 226 |
+
>
|
| 227 |
+
Model
|
| 228 |
+
</th>
|
| 229 |
+
{sortedCategoryNames.map((catName) => (
|
| 230 |
+
<th
|
| 231 |
+
key={catName}
|
| 232 |
+
scope="col"
|
| 233 |
+
className="px-3 py-2 text-left text-xs font-semibold text-gray-600 uppercase tracking-wider whitespace-nowrap"
|
| 234 |
+
>
|
| 235 |
+
{catName}
|
| 236 |
+
</th>
|
| 237 |
+
))}
|
| 238 |
+
</tr>
|
| 239 |
+
</thead>
|
| 240 |
+
<tbody className="bg-white divide-y divide-gray-200">
|
| 241 |
+
{models
|
| 242 |
+
?.filter((m) => selectedModels.includes(m.model))
|
| 243 |
+
.map((model, idx) => (
|
| 244 |
+
<tr
|
| 245 |
+
key={model.model}
|
| 246 |
+
className={
|
| 247 |
+
idx % 2 === 0
|
| 248 |
+
? "bg-white hover:bg-gray-50"
|
| 249 |
+
: "bg-gray-50 hover:bg-gray-100"
|
| 250 |
+
}
|
| 251 |
+
>
|
| 252 |
+
<td className="sticky left-0 bg-inherit px-3 py-2 whitespace-nowrap z-10 text-left">
|
| 253 |
+
{" "}
|
| 254 |
+
{/* Keep sticky styles */}
|
| 255 |
+
<div className="flex items-center">
|
| 256 |
+
<div
|
| 257 |
+
className="w-3 h-3 rounded-full mr-2 shrink-0"
|
| 258 |
+
style={{ backgroundColor: model.color }}
|
| 259 |
+
></div>
|
| 260 |
+
<span className="text-sm font-medium">
|
| 261 |
+
{model.model}
|
| 262 |
+
</span>
|
| 263 |
+
</div>
|
| 264 |
+
</td>
|
| 265 |
+
{sortedCategoryNames.map((catName) => {
|
| 266 |
+
// Use correct camelCase keys
|
| 267 |
+
const scoreData =
|
| 268 |
+
highLevelCategories[catName]?.modelScores?.[
|
| 269 |
+
model.model
|
| 270 |
+
];
|
| 271 |
+
const score = scoreData?.nationalScore; // Access camelCase key
|
| 272 |
+
const displayScore =
|
| 273 |
+
score !== null && score !== undefined
|
| 274 |
+
? Number(score).toFixed(1)
|
| 275 |
+
: "N/A";
|
| 276 |
+
return (
|
| 277 |
+
<td
|
| 278 |
+
key={catName}
|
| 279 |
+
className="px-3 py-2 whitespace-nowrap text-center"
|
| 280 |
+
>
|
| 281 |
+
<div
|
| 282 |
+
className={`text-sm ${
|
| 283 |
+
displayScore === "N/A"
|
| 284 |
+
? "text-gray-400"
|
| 285 |
+
: getScoreColor(score)
|
| 286 |
+
}`}
|
| 287 |
+
>
|
| 288 |
+
{displayScore}
|
| 289 |
+
</div>
|
| 290 |
+
</td>
|
| 291 |
+
);
|
| 292 |
+
})}
|
| 293 |
+
</tr>
|
| 294 |
+
))}
|
| 295 |
+
</tbody>
|
| 296 |
+
</table>
|
| 297 |
+
) : (
|
| 298 |
+
<p className="text-center text-gray-500 py-4">
|
| 299 |
+
No category data available.
|
| 300 |
+
</p>
|
| 301 |
+
)}
|
| 302 |
+
</div>
|
| 303 |
+
</div>
|
| 304 |
+
|
| 305 |
+
{/* Radar Chart for Categories (Uses filteredCategoryRadarData) */}
|
| 306 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 307 |
+
<div className="px-4 py-3 bg-gray-50 border-b flex justify-between items-center">
|
| 308 |
+
<h3 className="font-semibold text-gray-800">
|
| 309 |
+
Performance Across Categories
|
| 310 |
+
</h3>
|
| 311 |
+
<div className="text-xs text-gray-500">
|
| 312 |
+
National Average Scores
|
| 313 |
+
</div>
|
| 314 |
+
</div>
|
| 315 |
+
<div className="p-4">
|
| 316 |
+
{filteredCategoryRadarData &&
|
| 317 |
+
filteredCategoryRadarData.length > 0 ? (
|
| 318 |
+
<div className="h-96 md:h-[450px]">
|
| 319 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 320 |
+
<RadarChart
|
| 321 |
+
outerRadius="80%"
|
| 322 |
+
data={filteredCategoryRadarData}
|
| 323 |
+
>
|
| 324 |
+
<PolarGrid gridType="polygon" stroke="#e5e7eb" />
|
| 325 |
+
<PolarAngleAxis
|
| 326 |
+
dataKey="category"
|
| 327 |
+
tick={{ fill: "#4b5563", fontSize: 12 }}
|
| 328 |
+
/>
|
| 329 |
+
<PolarRadiusAxis
|
| 330 |
+
angle={90}
|
| 331 |
+
domain={[0, 100]}
|
| 332 |
+
axisLine={false}
|
| 333 |
+
tick={{ fill: "#6b7280", fontSize: 10 }}
|
| 334 |
+
/>
|
| 335 |
+
{models
|
| 336 |
+
?.filter((m) => selectedModels.includes(m.model))
|
| 337 |
+
.map((model) => (
|
| 338 |
+
<Radar
|
| 339 |
+
key={model.model}
|
| 340 |
+
name={model.model}
|
| 341 |
+
dataKey={model.model}
|
| 342 |
+
stroke={model.color}
|
| 343 |
+
fill={model.color}
|
| 344 |
+
fillOpacity={0.1}
|
| 345 |
+
strokeWidth={2}
|
| 346 |
+
/>
|
| 347 |
+
))}
|
| 348 |
+
{/* Use the corrected CustomRadarTooltip */}
|
| 349 |
+
<RechartsTooltip content={<CustomRadarTooltip />} />
|
| 350 |
+
<Legend
|
| 351 |
+
iconSize={10}
|
| 352 |
+
wrapperStyle={{ fontSize: "12px", paddingTop: "20px" }}
|
| 353 |
+
/>
|
| 354 |
+
</RadarChart>
|
| 355 |
+
</ResponsiveContainer>
|
| 356 |
+
</div>
|
| 357 |
+
) : (
|
| 358 |
+
<p className="text-center text-gray-500 py-4">
|
| 359 |
+
Radar data not available.
|
| 360 |
+
</p>
|
| 361 |
+
)}
|
| 362 |
+
<p className="text-xs text-gray-500 mt-4">
|
| 363 |
+
This radar chart visualizes how each model performs across
|
| 364 |
+
different high-level evaluation categories. The further out on
|
| 365 |
+
each axis, the better the performance on that category.
|
| 366 |
+
</p>
|
| 367 |
+
</div>
|
| 368 |
+
</div>
|
| 369 |
+
</div>
|
| 370 |
+
)}
|
| 371 |
+
|
| 372 |
+
{subTab === "metrics" && (
|
| 373 |
+
<div className="space-y-6">
|
| 374 |
+
{/* METRICS VIEW */}
|
| 375 |
+
{/* Radar Chart for Metrics (Uses metricRadarData) */}
|
| 376 |
+
<div className="border rounded-lg overflow-hidden shadow-sm">
|
| 377 |
+
<div className="px-4 py-3 bg-gray-50 border-b flex justify-between items-center">
|
| 378 |
+
<h3 className="font-semibold text-gray-800">
|
| 379 |
+
Performance Across All Metrics
|
| 380 |
+
</h3>
|
| 381 |
+
<div className="text-xs text-gray-500">
|
| 382 |
+
National Average Scores
|
| 383 |
+
</div>
|
| 384 |
+
</div>
|
| 385 |
+
<div className="p-4">
|
| 386 |
+
{metricRadarData.length > 0 ? (
|
| 387 |
+
<div className="h-96 md:h-[600px]">
|
| 388 |
+
{" "}
|
| 389 |
+
{/* Increased height */}
|
| 390 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 391 |
+
<RadarChart outerRadius="80%" data={metricRadarData}>
|
| 392 |
+
{" "}
|
| 393 |
+
{/* Use metricRadarData */}
|
| 394 |
+
<PolarGrid gridType="polygon" stroke="#e5e7eb" />
|
| 395 |
+
<PolarAngleAxis
|
| 396 |
+
dataKey="category"
|
| 397 |
+
tick={{ fill: "#4b5563", fontSize: 10 }}
|
| 398 |
+
/>{" "}
|
| 399 |
+
{/* Adjusted font size */}
|
| 400 |
+
<PolarRadiusAxis
|
| 401 |
+
angle={90}
|
| 402 |
+
domain={[0, 100]}
|
| 403 |
+
axisLine={false}
|
| 404 |
+
tick={{ fill: "#6b7280", fontSize: 10 }}
|
| 405 |
+
/>
|
| 406 |
+
{models
|
| 407 |
+
?.filter((m) => selectedModels.includes(m.model))
|
| 408 |
+
.map((model) => (
|
| 409 |
+
<Radar
|
| 410 |
+
key={model.model}
|
| 411 |
+
name={model.model}
|
| 412 |
+
dataKey={model.model}
|
| 413 |
+
stroke={model.color}
|
| 414 |
+
fill={model.color}
|
| 415 |
+
fillOpacity={0.1}
|
| 416 |
+
strokeWidth={2}
|
| 417 |
+
/>
|
| 418 |
+
))}
|
| 419 |
+
{/* Use the corrected CustomRadarTooltip */}
|
| 420 |
+
<RechartsTooltip content={<CustomRadarTooltip />} />
|
| 421 |
+
<Legend
|
| 422 |
+
iconSize={10}
|
| 423 |
+
wrapperStyle={{ fontSize: "12px", paddingTop: "20px" }}
|
| 424 |
+
/>
|
| 425 |
+
</RadarChart>
|
| 426 |
+
</ResponsiveContainer>
|
| 427 |
+
</div>
|
| 428 |
+
) : (
|
| 429 |
+
<p className="text-center text-gray-500 py-4">
|
| 430 |
+
Metric data not available for radar chart.
|
| 431 |
+
</p>
|
| 432 |
+
)}
|
| 433 |
+
<p className="text-xs text-gray-500 mt-4">
|
| 434 |
+
This radar chart visualizes how each model performs across
|
| 435 |
+
different low-level metrics. The further out on each axis, the
|
| 436 |
+
better the performance on that metric.
|
| 437 |
+
</p>
|
| 438 |
+
</div>
|
| 439 |
+
</div>
|
| 440 |
+
{/* Optional: Add a table summary for low-level metrics similar to the categories one if desired */}
|
| 441 |
+
</div>
|
| 442 |
+
)}
|
| 443 |
+
</>
|
| 444 |
+
);
|
| 445 |
+
};
|
| 446 |
+
|
| 447 |
+
export default MetricsBreakdown;
|
leaderboard-app/components/TaskPerformance.jsx
ADDED
|
@@ -0,0 +1,756 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// components/TaskPerformance.jsx
|
| 2 |
+
|
| 3 |
+
"use client";
|
| 4 |
+
|
| 5 |
+
import React, { useState, useMemo, useEffect } from "react";
|
| 6 |
+
import {
|
| 7 |
+
BarChart,
|
| 8 |
+
Bar,
|
| 9 |
+
XAxis,
|
| 10 |
+
YAxis,
|
| 11 |
+
CartesianGrid,
|
| 12 |
+
Tooltip as RechartsTooltip,
|
| 13 |
+
ResponsiveContainer,
|
| 14 |
+
Cell,
|
| 15 |
+
} from "recharts";
|
| 16 |
+
import {
|
| 17 |
+
getMetricTooltip,
|
| 18 |
+
getScoreBadgeColor,
|
| 19 |
+
formatDisplayKey,
|
| 20 |
+
camelToTitle,
|
| 21 |
+
} from "../lib/utils"; // Import formatDisplayKey
|
| 22 |
+
|
| 23 |
+
// Helper component for info tooltips
|
| 24 |
+
const InfoTooltip = ({ text }) => {
|
| 25 |
+
/* ... (no change) ... */
|
| 26 |
+
const [isVisible, setIsVisible] = useState(false);
|
| 27 |
+
return (
|
| 28 |
+
<div className="relative inline-block ml-1 align-middle">
|
| 29 |
+
<button
|
| 30 |
+
className="text-gray-400 hover:text-gray-600 focus:outline-none"
|
| 31 |
+
onMouseEnter={() => setIsVisible(true)}
|
| 32 |
+
onMouseLeave={() => setIsVisible(false)}
|
| 33 |
+
onClick={(e) => {
|
| 34 |
+
e.stopPropagation();
|
| 35 |
+
setIsVisible(!isVisible);
|
| 36 |
+
}}
|
| 37 |
+
aria-label="Info"
|
| 38 |
+
>
|
| 39 |
+
<svg
|
| 40 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 41 |
+
className="h-4 w-4"
|
| 42 |
+
viewBox="0 0 20 20"
|
| 43 |
+
fill="currentColor"
|
| 44 |
+
>
|
| 45 |
+
<path
|
| 46 |
+
fillRule="evenodd"
|
| 47 |
+
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
|
| 48 |
+
clipRule="evenodd"
|
| 49 |
+
/>
|
| 50 |
+
</svg>{" "}
|
| 51 |
+
</button>{" "}
|
| 52 |
+
{isVisible && (
|
| 53 |
+
<div className="absolute z-10 w-64 p-2 bg-white border rounded shadow-lg text-xs text-gray-700 -translate-x-1/2 left-1/2 mt-1">
|
| 54 |
+
{text}
|
| 55 |
+
</div>
|
| 56 |
+
)}{" "}
|
| 57 |
+
</div>
|
| 58 |
+
);
|
| 59 |
+
};
|
| 60 |
+
|
| 61 |
+
// Custom tooltip for charts
|
| 62 |
+
const CustomTooltip = ({ active, payload, label }) => {
|
| 63 |
+
/* ... (no change needed) ... */
|
| 64 |
+
if (active && payload && payload.length) {
|
| 65 |
+
const sortedPayload = [...payload].sort(
|
| 66 |
+
(a, b) => (b.value || 0) - (a.value || 0)
|
| 67 |
+
);
|
| 68 |
+
return (
|
| 69 |
+
<div className="bg-white p-3 border rounded shadow-lg max-w-xs">
|
| 70 |
+
<p className="font-medium text-sm">{label}</p>{" "}
|
| 71 |
+
{sortedPayload.map((entry, index) => (
|
| 72 |
+
<div key={`item-${index}`} className="flex items-center mt-1">
|
| 73 |
+
<div
|
| 74 |
+
className="w-3 h-3 mr-2 rounded-full flex-shrink-0"
|
| 75 |
+
style={{
|
| 76 |
+
backgroundColor:
|
| 77 |
+
entry.payload?.color || entry.color || "#8884d8",
|
| 78 |
+
}}
|
| 79 |
+
></div>{" "}
|
| 80 |
+
<span className="text-xs flex-grow pr-2">{entry.name}: </span>{" "}
|
| 81 |
+
<span className="text-xs font-medium ml-1 whitespace-nowrap">
|
| 82 |
+
{typeof entry.value === "number" ? entry.value.toFixed(1) : "N/A"}
|
| 83 |
+
</span>{" "}
|
| 84 |
+
</div>
|
| 85 |
+
))}{" "}
|
| 86 |
+
</div>
|
| 87 |
+
);
|
| 88 |
+
}
|
| 89 |
+
return null;
|
| 90 |
+
};
|
| 91 |
+
|
| 92 |
+
// Tab component
|
| 93 |
+
const TabButton = ({ active, onClick, children }) => (
|
| 94 |
+
<button
|
| 95 |
+
aria-pressed={active}
|
| 96 |
+
className={`px-4 py-1.5 text-sm font-medium rounded-md transition-colors duration-150 ${
|
| 97 |
+
active
|
| 98 |
+
? "bg-white shadow text-blue-600"
|
| 99 |
+
: "text-gray-600 hover:text-gray-800"
|
| 100 |
+
}`}
|
| 101 |
+
onClick={onClick}
|
| 102 |
+
>
|
| 103 |
+
{children}{" "}
|
| 104 |
+
</button>
|
| 105 |
+
);
|
| 106 |
+
|
| 107 |
+
// Main component
|
| 108 |
+
const TaskPerformance = ({
|
| 109 |
+
rawData,
|
| 110 |
+
modelsMeta,
|
| 111 |
+
metricsData, // Expects Title Case keys (e.g., Context Memory) containing internalMetricKey
|
| 112 |
+
overviewCardData,
|
| 113 |
+
}) => {
|
| 114 |
+
const [activeTab, setActiveTab] = useState("top-performers");
|
| 115 |
+
|
| 116 |
+
// *** Use Title Case metric keys from processed metricsData ***
|
| 117 |
+
const highLevelMetricDisplayKeys = useMemo(
|
| 118 |
+
() => Object.keys(metricsData?.highLevelCategories || {}).sort(),
|
| 119 |
+
[metricsData?.highLevelCategories]
|
| 120 |
+
);
|
| 121 |
+
const lowLevelMetricDisplayKeys = useMemo(
|
| 122 |
+
() => Object.keys(metricsData?.lowLevelMetrics || {}).sort(),
|
| 123 |
+
[metricsData?.lowLevelMetrics]
|
| 124 |
+
);
|
| 125 |
+
// **************************************************************
|
| 126 |
+
|
| 127 |
+
// Access original snake_case keys from rawData
|
| 128 |
+
const { taskLevelPerformance = {}, tasks = [] } = rawData || {};
|
| 129 |
+
const { bestModelPerTask = {} } = overviewCardData || {};
|
| 130 |
+
const models = modelsMeta || [];
|
| 131 |
+
|
| 132 |
+
// State for 'Model Performance' tab
|
| 133 |
+
const [selectedTask, setSelectedTask] = useState(
|
| 134 |
+
tasks.length > 0 ? tasks[0] : "all"
|
| 135 |
+
);
|
| 136 |
+
const [selectedMetricType, setSelectedMetricType] = useState("high");
|
| 137 |
+
// *** selectedMetric now stores the Title Case display key ***
|
| 138 |
+
const [selectedMetricDisplayKey, setSelectedMetricDisplayKey] = useState("");
|
| 139 |
+
// ***********************************************************
|
| 140 |
+
const [selectedModels, setSelectedModels] = useState([]);
|
| 141 |
+
|
| 142 |
+
// Determine current metrics list (Title Case display keys)
|
| 143 |
+
const currentMetricDisplayKeysList = useMemo(
|
| 144 |
+
() =>
|
| 145 |
+
selectedMetricType === "high"
|
| 146 |
+
? highLevelMetricDisplayKeys
|
| 147 |
+
: lowLevelMetricDisplayKeys,
|
| 148 |
+
[selectedMetricType, highLevelMetricDisplayKeys, lowLevelMetricDisplayKeys]
|
| 149 |
+
);
|
| 150 |
+
|
| 151 |
+
// Load models on mount
|
| 152 |
+
useEffect(() => {
|
| 153 |
+
if (models.length > 0 && selectedModels.length === 0) {
|
| 154 |
+
setSelectedModels(models.map((m) => m.model));
|
| 155 |
+
}
|
| 156 |
+
}, [models, selectedModels.length]);
|
| 157 |
+
|
| 158 |
+
// Set default metric display key when the list or type changes
|
| 159 |
+
useEffect(() => {
|
| 160 |
+
if (currentMetricDisplayKeysList.length > 0) {
|
| 161 |
+
if (
|
| 162 |
+
!selectedMetricDisplayKey ||
|
| 163 |
+
!currentMetricDisplayKeysList.includes(selectedMetricDisplayKey)
|
| 164 |
+
) {
|
| 165 |
+
setSelectedMetricDisplayKey(currentMetricDisplayKeysList[0]); // Set to the first Title Case key
|
| 166 |
+
}
|
| 167 |
+
} else {
|
| 168 |
+
setSelectedMetricDisplayKey("");
|
| 169 |
+
}
|
| 170 |
+
}, [currentMetricDisplayKeysList, selectedMetricDisplayKey]);
|
| 171 |
+
|
| 172 |
+
// Prep chart data - *** UPDATED to use internalMetricKey looked up via selectedMetricDisplayKey ***
|
| 173 |
+
const chartData = useMemo(() => {
|
| 174 |
+
if (
|
| 175 |
+
!taskLevelPerformance ||
|
| 176 |
+
!selectedMetricDisplayKey ||
|
| 177 |
+
selectedModels.length === 0
|
| 178 |
+
)
|
| 179 |
+
return [];
|
| 180 |
+
|
| 181 |
+
// Find the internal snake_case key using the selected Title Case display name
|
| 182 |
+
const allMetricsProcessed = {
|
| 183 |
+
...(metricsData?.highLevelCategories || {}),
|
| 184 |
+
...(metricsData?.lowLevelMetrics || {}),
|
| 185 |
+
};
|
| 186 |
+
const metricInfo = allMetricsProcessed[selectedMetricDisplayKey]; // Look up using Title Case key
|
| 187 |
+
const internalMetricKey = metricInfo?.internalMetricKey; // Access the stored snake_case key
|
| 188 |
+
|
| 189 |
+
if (!internalMetricKey) {
|
| 190 |
+
console.warn(
|
| 191 |
+
`Could not find internal key for selected metric: ${selectedMetricDisplayKey}`
|
| 192 |
+
);
|
| 193 |
+
return [];
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
let data = [];
|
| 197 |
+
if (selectedTask === "all") {
|
| 198 |
+
const modelAggregates = {};
|
| 199 |
+
tasks.forEach((task) => {
|
| 200 |
+
if (taskLevelPerformance[task]) {
|
| 201 |
+
Object.entries(taskLevelPerformance[task]).forEach(
|
| 202 |
+
([model, metrics]) => {
|
| 203 |
+
if (selectedModels.includes(model)) {
|
| 204 |
+
// *** Use the FOUND snake_case internalMetricKey ***
|
| 205 |
+
const score = metrics?.[internalMetricKey];
|
| 206 |
+
if (score !== undefined && score !== null && score !== "N/A") {
|
| 207 |
+
const numScore = parseFloat(score);
|
| 208 |
+
if (!isNaN(numScore)) {
|
| 209 |
+
if (!modelAggregates[model])
|
| 210 |
+
modelAggregates[model] = { sum: 0, count: 0 };
|
| 211 |
+
modelAggregates[model].sum += numScore;
|
| 212 |
+
modelAggregates[model].count++;
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
);
|
| 218 |
+
}
|
| 219 |
+
});
|
| 220 |
+
data = Object.entries(modelAggregates).map(([model, aggregates]) => {
|
| 221 |
+
const modelMeta = models.find((m) => m.model === model) || {};
|
| 222 |
+
return {
|
| 223 |
+
model: model,
|
| 224 |
+
score:
|
| 225 |
+
aggregates.count > 0 ? aggregates.sum / aggregates.count : null,
|
| 226 |
+
color: modelMeta.color || "#999999",
|
| 227 |
+
};
|
| 228 |
+
});
|
| 229 |
+
} else if (taskLevelPerformance[selectedTask]) {
|
| 230 |
+
data = Object.entries(taskLevelPerformance[selectedTask])
|
| 231 |
+
.filter(([model, _metrics]) => selectedModels.includes(model))
|
| 232 |
+
.map(([model, metrics]) => {
|
| 233 |
+
// *** Use the FOUND snake_case internalMetricKey ***
|
| 234 |
+
const score = metrics?.[internalMetricKey];
|
| 235 |
+
const modelMeta = models.find((m) => m.model === model) || {};
|
| 236 |
+
return {
|
| 237 |
+
model: model,
|
| 238 |
+
score:
|
| 239 |
+
score !== undefined && score !== null && score !== "N/A"
|
| 240 |
+
? parseFloat(score)
|
| 241 |
+
: null,
|
| 242 |
+
color: modelMeta.color || "#999999",
|
| 243 |
+
};
|
| 244 |
+
});
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
return data
|
| 248 |
+
.filter((item) => item.score !== null && !isNaN(item.score))
|
| 249 |
+
.sort((a, b) => b.score - a.score);
|
| 250 |
+
// Update dependencies
|
| 251 |
+
}, [
|
| 252 |
+
selectedTask,
|
| 253 |
+
selectedMetricDisplayKey,
|
| 254 |
+
selectedModels,
|
| 255 |
+
taskLevelPerformance,
|
| 256 |
+
models,
|
| 257 |
+
metricsData,
|
| 258 |
+
tasks,
|
| 259 |
+
]);
|
| 260 |
+
|
| 261 |
+
// Task definitions
|
| 262 |
+
const featuredTasks = useMemo(
|
| 263 |
+
() => [
|
| 264 |
+
/* ... (keep task definitions array) ... */ {
|
| 265 |
+
id: "Generating a Creative Idea",
|
| 266 |
+
title: "Generating Creative Ideas",
|
| 267 |
+
description: "Brainstorming unique birthday gift ideas.",
|
| 268 |
+
icon: (color) => (
|
| 269 |
+
<svg
|
| 270 |
+
style={{ color: color || "#6b7280" }}
|
| 271 |
+
className="h-8 w-8"
|
| 272 |
+
fill="none"
|
| 273 |
+
viewBox="0 0 24 24"
|
| 274 |
+
stroke="currentColor"
|
| 275 |
+
>
|
| 276 |
+
<path
|
| 277 |
+
strokeLinecap="round"
|
| 278 |
+
strokeLinejoin="round"
|
| 279 |
+
strokeWidth={2}
|
| 280 |
+
d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"
|
| 281 |
+
/>
|
| 282 |
+
</svg>
|
| 283 |
+
),
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
id: "Creating a Travel Itinerary",
|
| 287 |
+
title: "Creating Travel Itinerary",
|
| 288 |
+
description: "Planning a European city break.",
|
| 289 |
+
icon: (color) => (
|
| 290 |
+
<svg
|
| 291 |
+
style={{ color: color || "#6b7280" }}
|
| 292 |
+
className="h-8 w-8"
|
| 293 |
+
fill="none"
|
| 294 |
+
viewBox="0 0 24 24"
|
| 295 |
+
stroke="currentColor"
|
| 296 |
+
>
|
| 297 |
+
<path
|
| 298 |
+
strokeLinecap="round"
|
| 299 |
+
strokeLinejoin="round"
|
| 300 |
+
strokeWidth={2}
|
| 301 |
+
d="M17.657 16.657L13.414 20.9a1.998 1.998 0 01-2.827 0l-4.244-4.243a8 8 0 1111.314 0z"
|
| 302 |
+
/>
|
| 303 |
+
<path
|
| 304 |
+
strokeLinecap="round"
|
| 305 |
+
strokeLinejoin="round"
|
| 306 |
+
strokeWidth={2}
|
| 307 |
+
d="M15 11a3 3 0 11-6 0 3 3 0 016 0z"
|
| 308 |
+
/>
|
| 309 |
+
</svg>
|
| 310 |
+
),
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
id: "Following Up on a Job Application",
|
| 314 |
+
title: "Following Up on Job App",
|
| 315 |
+
description: "Drafting a professional follow-up email.",
|
| 316 |
+
icon: (color) => (
|
| 317 |
+
<svg
|
| 318 |
+
style={{ color: color || "#6b7280" }}
|
| 319 |
+
className="h-8 w-8"
|
| 320 |
+
fill="none"
|
| 321 |
+
viewBox="0 0 24 24"
|
| 322 |
+
stroke="currentColor"
|
| 323 |
+
>
|
| 324 |
+
<path
|
| 325 |
+
strokeLinecap="round"
|
| 326 |
+
strokeLinejoin="round"
|
| 327 |
+
strokeWidth={2}
|
| 328 |
+
d="M3 8l7.89 5.26a2 2 0 002.22 0L21 8M5 19h14a2 2 0 002-2V7a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"
|
| 329 |
+
/>
|
| 330 |
+
</svg>
|
| 331 |
+
),
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
id: "Planning Your Weekly Meals",
|
| 335 |
+
title: "Planning Weekly Meals",
|
| 336 |
+
description: "Creating a meal plan accommodating dietary restrictions.",
|
| 337 |
+
icon: (color) => (
|
| 338 |
+
<svg
|
| 339 |
+
style={{ color: color || "#6b7280" }}
|
| 340 |
+
className="h-8 w-8"
|
| 341 |
+
fill="none"
|
| 342 |
+
viewBox="0 0 24 24"
|
| 343 |
+
stroke="currentColor"
|
| 344 |
+
>
|
| 345 |
+
<path
|
| 346 |
+
strokeLinecap="round"
|
| 347 |
+
strokeLinejoin="round"
|
| 348 |
+
strokeWidth={2}
|
| 349 |
+
d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2"
|
| 350 |
+
/>
|
| 351 |
+
</svg>
|
| 352 |
+
),
|
| 353 |
+
},
|
| 354 |
+
{
|
| 355 |
+
id: "Making a Decision Between Options",
|
| 356 |
+
title: "Making a Decision",
|
| 357 |
+
description: "Comparing tech products for purchase.",
|
| 358 |
+
icon: (color) => (
|
| 359 |
+
<svg
|
| 360 |
+
style={{ color: color || "#6b7280" }}
|
| 361 |
+
className="h-8 w-8"
|
| 362 |
+
fill="none"
|
| 363 |
+
viewBox="0 0 24 24"
|
| 364 |
+
stroke="currentColor"
|
| 365 |
+
strokeWidth={2}
|
| 366 |
+
>
|
| 367 |
+
<path
|
| 368 |
+
strokeLinecap="round"
|
| 369 |
+
strokeLinejoin="round"
|
| 370 |
+
d="M14 5l7 7m0 0l-7 7m7-7H3"
|
| 371 |
+
/>{" "}
|
| 372 |
+
<path
|
| 373 |
+
strokeLinecap="round"
|
| 374 |
+
strokeLinejoin="round"
|
| 375 |
+
d="M10 19l-7-7m0 0l7-7m-7 7h17"
|
| 376 |
+
/>
|
| 377 |
+
</svg>
|
| 378 |
+
),
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
id: "Understanding a Complex Topic",
|
| 382 |
+
title: "Understanding a Complex Topic",
|
| 383 |
+
description: "Learning about day trading concepts.",
|
| 384 |
+
icon: (color) => (
|
| 385 |
+
<svg
|
| 386 |
+
style={{ color: color || "#6b7280" }}
|
| 387 |
+
className="h-8 w-8"
|
| 388 |
+
fill="none"
|
| 389 |
+
viewBox="0 0 24 24"
|
| 390 |
+
stroke="currentColor"
|
| 391 |
+
>
|
| 392 |
+
<path
|
| 393 |
+
strokeLinecap="round"
|
| 394 |
+
strokeLinejoin="round"
|
| 395 |
+
strokeWidth={2}
|
| 396 |
+
d="M12 6.253v13m0-13C10.832 5.477 9.246 5 7.5 5S4.168 5.477 3 6.253v13C4.168 18.477 5.754 18 7.5 18s3.332.477 4.5 1.253m0-13C13.168 5.477 14.754 5 16.5 5c1.747 0 3.332.477 4.5 1.253v13C19.832 18.477 18.247 18 16.5 18c-1.746 0-3.332.477-4.5 1.253"
|
| 397 |
+
/>
|
| 398 |
+
</svg>
|
| 399 |
+
),
|
| 400 |
+
},
|
| 401 |
+
],
|
| 402 |
+
[]
|
| 403 |
+
);
|
| 404 |
+
const tasksToDisplay = useMemo(() => {
|
| 405 |
+
const availableTaskKeys = bestModelPerTask
|
| 406 |
+
? Object.keys(bestModelPerTask)
|
| 407 |
+
: [];
|
| 408 |
+
return featuredTasks.filter((ft) => availableTaskKeys.includes(ft.id));
|
| 409 |
+
}, [bestModelPerTask, featuredTasks]);
|
| 410 |
+
const taskRankings = useMemo(() => {
|
| 411 |
+
const rankings = {};
|
| 412 |
+
tasksToDisplay.forEach((task) => {
|
| 413 |
+
const taskId = task.id;
|
| 414 |
+
if (!taskLevelPerformance[taskId]) {
|
| 415 |
+
rankings[taskId] = [];
|
| 416 |
+
return;
|
| 417 |
+
}
|
| 418 |
+
const taskScores = models
|
| 419 |
+
.map((modelMeta) => {
|
| 420 |
+
const modelData = taskLevelPerformance[taskId][modelMeta.model];
|
| 421 |
+
if (!modelData) return null;
|
| 422 |
+
const scores = Object.values(modelData)
|
| 423 |
+
.map((s) => parseFloat(s))
|
| 424 |
+
.filter((s) => !isNaN(s));
|
| 425 |
+
if (scores.length === 0) return null;
|
| 426 |
+
const avgScore =
|
| 427 |
+
scores.reduce((sum, score) => sum + score, 0) / scores.length;
|
| 428 |
+
return {
|
| 429 |
+
model: modelMeta.model,
|
| 430 |
+
taskAvgScore: avgScore,
|
| 431 |
+
color: modelMeta.color || "#999999",
|
| 432 |
+
};
|
| 433 |
+
})
|
| 434 |
+
.filter((item) => item !== null)
|
| 435 |
+
.sort((a, b) => b.taskAvgScore - a.taskAvgScore);
|
| 436 |
+
rankings[taskId] = taskScores;
|
| 437 |
+
});
|
| 438 |
+
return rankings;
|
| 439 |
+
}, [tasksToDisplay, taskLevelPerformance, models]);
|
| 440 |
+
|
| 441 |
+
const renderTopPerformersTab = () => (
|
| 442 |
+
<div className="mb-6">
|
| 443 |
+
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
|
| 444 |
+
{tasksToDisplay.length === 0 && (
|
| 445 |
+
<p className="col-span-full text-center text-gray-500 py-8">
|
| 446 |
+
No task performance data available.
|
| 447 |
+
</p>
|
| 448 |
+
)}
|
| 449 |
+
{tasksToDisplay.map((task) => {
|
| 450 |
+
const bestModelInfo = bestModelPerTask?.[task.id];
|
| 451 |
+
const topModelsForTask = taskRankings[task.id] || [];
|
| 452 |
+
if (!bestModelInfo || bestModelInfo.model === "N/A") return null;
|
| 453 |
+
const modelColor = bestModelInfo.color || "#6b7280";
|
| 454 |
+
return (
|
| 455 |
+
<div
|
| 456 |
+
key={task.id}
|
| 457 |
+
className="border rounded-lg overflow-hidden shadow-sm bg-white flex flex-col"
|
| 458 |
+
>
|
| 459 |
+
<div className="px-4 py-2 bg-gray-50 border-b flex items-center flex-shrink-0">
|
| 460 |
+
<h3
|
| 461 |
+
className="font-semibold text-sm flex-grow truncate pr-2"
|
| 462 |
+
title={task.title}
|
| 463 |
+
>
|
| 464 |
+
{task.title}
|
| 465 |
+
</h3>
|
| 466 |
+
<div
|
| 467 |
+
className="ml-1 w-2 h-2 rounded-full flex-shrink-0"
|
| 468 |
+
style={{ backgroundColor: modelColor }}
|
| 469 |
+
aria-hidden="true"
|
| 470 |
+
></div>
|
| 471 |
+
</div>
|
| 472 |
+
<div className="p-4 flex-grow flex flex-col">
|
| 473 |
+
<div className="flex items-center mb-4 flex-shrink-0">
|
| 474 |
+
<div
|
| 475 |
+
className="p-2 rounded-full flex-shrink-0"
|
| 476 |
+
style={{ backgroundColor: `${modelColor}20` }}
|
| 477 |
+
>
|
| 478 |
+
{task.icon(modelColor)}
|
| 479 |
+
</div>
|
| 480 |
+
<div className="ml-4 overflow-hidden">
|
| 481 |
+
<h4
|
| 482 |
+
className="text-lg font-semibold truncate"
|
| 483 |
+
title={bestModelInfo.model}
|
| 484 |
+
>
|
| 485 |
+
{bestModelInfo.model}
|
| 486 |
+
</h4>
|
| 487 |
+
<p className="text-sm text-gray-600">
|
| 488 |
+
Avg. Score: {bestModelInfo.score?.toFixed(1) ?? "N/A"}
|
| 489 |
+
</p>
|
| 490 |
+
</div>
|
| 491 |
+
</div>
|
| 492 |
+
<div className="mb-4 flex-grow">
|
| 493 |
+
<h5 className="text-sm font-semibold mb-2">Task Ranking</h5>
|
| 494 |
+
{topModelsForTask.length > 0 ? (
|
| 495 |
+
<ol className="space-y-1.5 list-none pl-0">
|
| 496 |
+
{topModelsForTask.map((rankedModel, index) => (
|
| 497 |
+
<li
|
| 498 |
+
key={rankedModel.model}
|
| 499 |
+
className="text-sm flex items-center justify-between"
|
| 500 |
+
>
|
| 501 |
+
<div className="flex items-center truncate mr-2">
|
| 502 |
+
<span className="font-medium w-4 mr-1.5 text-gray-500">
|
| 503 |
+
{index + 1}.
|
| 504 |
+
</span>
|
| 505 |
+
<div
|
| 506 |
+
className="w-2.5 h-2.5 rounded-full mr-1.5 flex-shrink-0"
|
| 507 |
+
style={{ backgroundColor: rankedModel.color }}
|
| 508 |
+
></div>
|
| 509 |
+
<span
|
| 510 |
+
className="truncate"
|
| 511 |
+
title={rankedModel.model}
|
| 512 |
+
>
|
| 513 |
+
{rankedModel.model}
|
| 514 |
+
</span>
|
| 515 |
+
</div>
|
| 516 |
+
<span
|
| 517 |
+
className={`font-medium flex-shrink-0 px-1.5 py-0.5 text-xs rounded ${getScoreBadgeColor(
|
| 518 |
+
rankedModel.taskAvgScore
|
| 519 |
+
)}`}
|
| 520 |
+
>
|
| 521 |
+
{rankedModel.taskAvgScore?.toFixed(1) ?? "N/A"}
|
| 522 |
+
</span>
|
| 523 |
+
</li>
|
| 524 |
+
))}
|
| 525 |
+
</ol>
|
| 526 |
+
) : (
|
| 527 |
+
<p className="text-xs text-gray-500 italic">
|
| 528 |
+
Ranking data not available.
|
| 529 |
+
</p>
|
| 530 |
+
)}
|
| 531 |
+
</div>
|
| 532 |
+
<p className="text-xs text-gray-600 mt-auto pt-2 flex-shrink-0">
|
| 533 |
+
Task Example: {task.description}
|
| 534 |
+
</p>
|
| 535 |
+
</div>
|
| 536 |
+
</div>
|
| 537 |
+
);
|
| 538 |
+
})}
|
| 539 |
+
</div>
|
| 540 |
+
</div>
|
| 541 |
+
);
|
| 542 |
+
|
| 543 |
+
// Render the model performance analysis tab - *** UPDATED SELECTOR & LABELS ***
|
| 544 |
+
const renderModelPerformanceTab = () => (
|
| 545 |
+
<div>
|
| 546 |
+
{/* Controls Panel */}
|
| 547 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 548 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 549 |
+
<h3 className="font-semibold text-gray-800">
|
| 550 |
+
Task Analysis Controls
|
| 551 |
+
</h3>
|
| 552 |
+
</div>
|
| 553 |
+
<div className="p-4 flex flex-wrap items-center gap-4">
|
| 554 |
+
{/* Task Selector */}
|
| 555 |
+
<div className="w-full sm:w-auto">
|
| 556 |
+
<label
|
| 557 |
+
htmlFor="taskSelect"
|
| 558 |
+
className="block text-sm font-medium text-gray-700 mb-1"
|
| 559 |
+
>
|
| 560 |
+
Task
|
| 561 |
+
</label>
|
| 562 |
+
<select
|
| 563 |
+
id="taskSelect"
|
| 564 |
+
className="w-full sm:w-64 border rounded-md px-3 py-2 bg-white shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
|
| 565 |
+
value={selectedTask}
|
| 566 |
+
onChange={(e) => setSelectedTask(e.target.value)}
|
| 567 |
+
>
|
| 568 |
+
<option value="all">All Tasks (Average)</option>
|
| 569 |
+
{tasks.sort().map((task) => (
|
| 570 |
+
<option key={task} value={task}>
|
| 571 |
+
{task}
|
| 572 |
+
</option>
|
| 573 |
+
))}
|
| 574 |
+
</select>
|
| 575 |
+
</div>
|
| 576 |
+
{/* Metric Type Selector Pills */}
|
| 577 |
+
<div className="flex flex-col">
|
| 578 |
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
| 579 |
+
Metric Type
|
| 580 |
+
</label>
|
| 581 |
+
<div className="flex space-x-1 p-1 bg-gray-200 rounded-lg">
|
| 582 |
+
<TabButton
|
| 583 |
+
active={selectedMetricType === "high"}
|
| 584 |
+
onClick={() => setSelectedMetricType("high")}
|
| 585 |
+
>
|
| 586 |
+
High-Level
|
| 587 |
+
</TabButton>
|
| 588 |
+
<TabButton
|
| 589 |
+
active={selectedMetricType === "low"}
|
| 590 |
+
onClick={() => setSelectedMetricType("low")}
|
| 591 |
+
>
|
| 592 |
+
Low-Level
|
| 593 |
+
</TabButton>
|
| 594 |
+
</div>
|
| 595 |
+
</div>
|
| 596 |
+
{/* Metric Selector - VALUE is Title Case key, displays Title Case */}
|
| 597 |
+
<div className="w-full sm:w-auto">
|
| 598 |
+
<label
|
| 599 |
+
htmlFor="metricSelect"
|
| 600 |
+
className="block text-sm font-medium text-gray-700 mb-1"
|
| 601 |
+
>
|
| 602 |
+
{selectedMetricType === "high"
|
| 603 |
+
? "High-Level Metric"
|
| 604 |
+
: "Low-Level Metric"}
|
| 605 |
+
</label>
|
| 606 |
+
<select
|
| 607 |
+
id="metricSelect"
|
| 608 |
+
className="w-full sm:w-48 border rounded-md px-3 py-2 bg-white shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
|
| 609 |
+
value={selectedMetricDisplayKey} // VALUE is the Title Case key
|
| 610 |
+
onChange={(e) => setSelectedMetricDisplayKey(e.target.value)} // Store Title Case key
|
| 611 |
+
disabled={currentMetricDisplayKeysList.length === 0}
|
| 612 |
+
>
|
| 613 |
+
{currentMetricDisplayKeysList.length === 0 && (
|
| 614 |
+
<option value="">No metrics</option>
|
| 615 |
+
)}
|
| 616 |
+
{/* Iterate through Title Case keys, display Title Case */}
|
| 617 |
+
{currentMetricDisplayKeysList.map((displayKey) => (
|
| 618 |
+
<option key={displayKey} value={displayKey}>
|
| 619 |
+
{displayKey}
|
| 620 |
+
</option>
|
| 621 |
+
))}
|
| 622 |
+
</select>
|
| 623 |
+
</div>
|
| 624 |
+
</div>
|
| 625 |
+
</div>
|
| 626 |
+
|
| 627 |
+
{/* Chart Visualization */}
|
| 628 |
+
<div className="border rounded-lg overflow-hidden mb-6 shadow-sm">
|
| 629 |
+
{/* Use selectedMetricDisplayKey for title */}
|
| 630 |
+
<div className="px-4 py-3 bg-gray-50 border-b">
|
| 631 |
+
<h3 className="font-semibold text-gray-800">
|
| 632 |
+
{`${selectedMetricDisplayKey || "Selected Metric"} Comparison for `}
|
| 633 |
+
<span className="font-normal">
|
| 634 |
+
{selectedTask === "all"
|
| 635 |
+
? "All Tasks (Average)"
|
| 636 |
+
: `"${selectedTask}"`}
|
| 637 |
+
</span>
|
| 638 |
+
</h3>
|
| 639 |
+
</div>
|
| 640 |
+
<div className="p-4">
|
| 641 |
+
{chartData.length > 0 ? (
|
| 642 |
+
<div className="h-80">
|
| 643 |
+
<ResponsiveContainer width="100%" height="100%">
|
| 644 |
+
<BarChart
|
| 645 |
+
data={chartData}
|
| 646 |
+
margin={{ top: 5, right: 5, left: 0, bottom: 5 }}
|
| 647 |
+
barCategoryGap="20%"
|
| 648 |
+
>
|
| 649 |
+
<CartesianGrid strokeDasharray="3 3" vertical={false} />
|
| 650 |
+
<XAxis dataKey="model" hide />
|
| 651 |
+
<YAxis domain={[0, 100]} width={30} tick={{ fontSize: 11 }} />
|
| 652 |
+
<RechartsTooltip
|
| 653 |
+
content={<CustomTooltip />}
|
| 654 |
+
wrapperStyle={{ zIndex: 10 }}
|
| 655 |
+
/>
|
| 656 |
+
{/* Use Title Case key for Bar name */}
|
| 657 |
+
<Bar
|
| 658 |
+
dataKey="score"
|
| 659 |
+
name={selectedMetricDisplayKey || "Score"}
|
| 660 |
+
radius={[4, 4, 0, 0]}
|
| 661 |
+
>
|
| 662 |
+
{chartData.map((entry, index) => (
|
| 663 |
+
<Cell key={`cell-${index}`} fill={entry.color} />
|
| 664 |
+
))}
|
| 665 |
+
</Bar>
|
| 666 |
+
</BarChart>
|
| 667 |
+
</ResponsiveContainer>
|
| 668 |
+
<div className="flex flex-wrap justify-center gap-x-4 gap-y-1 mt-4 text-xs">
|
| 669 |
+
{chartData.map((entry) => (
|
| 670 |
+
<div key={entry.model} className="flex items-center">
|
| 671 |
+
<div
|
| 672 |
+
className="w-2.5 h-2.5 rounded-full mr-1.5"
|
| 673 |
+
style={{ backgroundColor: entry.color }}
|
| 674 |
+
></div>
|
| 675 |
+
<span>{entry.model}</span>
|
| 676 |
+
</div>
|
| 677 |
+
))}
|
| 678 |
+
</div>
|
| 679 |
+
</div>
|
| 680 |
+
) : (
|
| 681 |
+
<div className="flex items-center justify-center h-60 bg-gray-50 rounded">
|
| 682 |
+
<div className="text-center p-4">
|
| 683 |
+
<svg
|
| 684 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 685 |
+
className="h-10 w-10 mx-auto text-gray-400 mb-3"
|
| 686 |
+
fill="none"
|
| 687 |
+
viewBox="0 0 24 24"
|
| 688 |
+
stroke="currentColor"
|
| 689 |
+
>
|
| 690 |
+
<path
|
| 691 |
+
strokeLinecap="round"
|
| 692 |
+
strokeLinejoin="round"
|
| 693 |
+
strokeWidth={2}
|
| 694 |
+
d="M9 17v-2m3 2v-4m3 4v-6m2 10H7a2 2 0 01-2-2V7a2 2 0 012-2h2l2-3h6l2 3h2a2 2 0 012 2v10a2 2 0 01-2 2h-1"
|
| 695 |
+
/>
|
| 696 |
+
</svg>
|
| 697 |
+
<h3 className="text-lg font-medium text-gray-900 mb-1">
|
| 698 |
+
No Data Available
|
| 699 |
+
</h3>
|
| 700 |
+
<p className="text-sm text-gray-600">
|
| 701 |
+
No data available for the selected task, metric, and models.
|
| 702 |
+
</p>
|
| 703 |
+
</div>
|
| 704 |
+
</div>
|
| 705 |
+
)}
|
| 706 |
+
<div className="mt-15 text-xs text-gray-500">
|
| 707 |
+
{/* Corrected margin-top */}
|
| 708 |
+
{/* Use Title Case key for display and lookup */}
|
| 709 |
+
<p>
|
| 710 |
+
This chart shows{" "}
|
| 711 |
+
<strong>
|
| 712 |
+
{selectedMetricDisplayKey || "the selected metric"}
|
| 713 |
+
</strong>{" "}
|
| 714 |
+
scores (0-100, higher is better) for models on
|
| 715 |
+
{selectedTask === "all"
|
| 716 |
+
? "average across all tasks"
|
| 717 |
+
: `the "${selectedTask}" task`}
|
| 718 |
+
.
|
| 719 |
+
{selectedMetricDisplayKey &&
|
| 720 |
+
` Metric definition: ${getMetricTooltip(
|
| 721 |
+
selectedMetricDisplayKey
|
| 722 |
+
)}`}
|
| 723 |
+
</p>
|
| 724 |
+
</div>
|
| 725 |
+
</div>
|
| 726 |
+
</div>
|
| 727 |
+
</div>
|
| 728 |
+
);
|
| 729 |
+
|
| 730 |
+
// Main return with tabs
|
| 731 |
+
return (
|
| 732 |
+
<div>
|
| 733 |
+
<div className="mb-6 flex flex-col md:flex-row justify-between items-center gap-4">
|
| 734 |
+
<div className="flex space-x-1 p-1 bg-gray-200 rounded-lg">
|
| 735 |
+
<TabButton
|
| 736 |
+
active={activeTab === "top-performers"}
|
| 737 |
+
onClick={() => setActiveTab("top-performers")}
|
| 738 |
+
>
|
| 739 |
+
Top Performing Models by Task
|
| 740 |
+
</TabButton>{" "}
|
| 741 |
+
<TabButton
|
| 742 |
+
active={activeTab === "model-performance"}
|
| 743 |
+
onClick={() => setActiveTab("model-performance")}
|
| 744 |
+
>
|
| 745 |
+
Model Performance Comparison
|
| 746 |
+
</TabButton>{" "}
|
| 747 |
+
</div>{" "}
|
| 748 |
+
</div>
|
| 749 |
+
{activeTab === "top-performers"
|
| 750 |
+
? renderTopPerformersTab()
|
| 751 |
+
: renderModelPerformanceTab()}
|
| 752 |
+
</div>
|
| 753 |
+
);
|
| 754 |
+
};
|
| 755 |
+
|
| 756 |
+
export default TaskPerformance;
|
leaderboard-app/components/Tooltip.jsx
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import React, { useState, useRef, useEffect } from "react";
|
| 4 |
+
|
| 5 |
+
export const Tooltip = ({
|
| 6 |
+
content,
|
| 7 |
+
children,
|
| 8 |
+
position = "top",
|
| 9 |
+
showIcon = true,
|
| 10 |
+
iconClassName = "",
|
| 11 |
+
}) => {
|
| 12 |
+
const [isVisible, setIsVisible] = useState(false);
|
| 13 |
+
const [tooltipStyle, setTooltipStyle] = useState({});
|
| 14 |
+
const tooltipRef = useRef(null);
|
| 15 |
+
const iconRef = useRef(null);
|
| 16 |
+
|
| 17 |
+
const showTooltip = () => setIsVisible(true);
|
| 18 |
+
const hideTooltip = () => setIsVisible(false);
|
| 19 |
+
|
| 20 |
+
// Position the tooltip when it becomes visible
|
| 21 |
+
useEffect(() => {
|
| 22 |
+
if (isVisible && iconRef.current && tooltipRef.current) {
|
| 23 |
+
const triggerRect = iconRef.current.getBoundingClientRect();
|
| 24 |
+
const tooltipRect = tooltipRef.current.getBoundingClientRect();
|
| 25 |
+
const spacing = 8; // Space between trigger and tooltip
|
| 26 |
+
|
| 27 |
+
let style = {};
|
| 28 |
+
|
| 29 |
+
switch (position) {
|
| 30 |
+
case "top":
|
| 31 |
+
style = {
|
| 32 |
+
left:
|
| 33 |
+
triggerRect.left + triggerRect.width / 2 - tooltipRect.width / 2,
|
| 34 |
+
top: triggerRect.top - tooltipRect.height - spacing,
|
| 35 |
+
};
|
| 36 |
+
break;
|
| 37 |
+
case "bottom":
|
| 38 |
+
style = {
|
| 39 |
+
left:
|
| 40 |
+
triggerRect.left + triggerRect.width / 2 - tooltipRect.width / 2,
|
| 41 |
+
top: triggerRect.bottom + spacing,
|
| 42 |
+
};
|
| 43 |
+
break;
|
| 44 |
+
case "left":
|
| 45 |
+
style = {
|
| 46 |
+
left: triggerRect.left - tooltipRect.width - spacing,
|
| 47 |
+
top:
|
| 48 |
+
triggerRect.top + triggerRect.height / 2 - tooltipRect.height / 2,
|
| 49 |
+
};
|
| 50 |
+
break;
|
| 51 |
+
case "right":
|
| 52 |
+
style = {
|
| 53 |
+
left: triggerRect.right + spacing,
|
| 54 |
+
top:
|
| 55 |
+
triggerRect.top + triggerRect.height / 2 - tooltipRect.height / 2,
|
| 56 |
+
};
|
| 57 |
+
break;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
// Adjust if tooltip would go off-screen
|
| 61 |
+
const viewportWidth = window.innerWidth;
|
| 62 |
+
const viewportHeight = window.innerHeight;
|
| 63 |
+
|
| 64 |
+
if (style.left < 10) style.left = 10;
|
| 65 |
+
if (style.left + tooltipRect.width > viewportWidth - 10) {
|
| 66 |
+
style.left = viewportWidth - tooltipRect.width - 10;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
if (style.top < 10) style.top = 10;
|
| 70 |
+
if (style.top + tooltipRect.height > viewportHeight - 10) {
|
| 71 |
+
style.top = viewportHeight - tooltipRect.height - 10;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
// Convert to fixed position
|
| 75 |
+
style.position = "fixed";
|
| 76 |
+
style.left = `${style.left}px`;
|
| 77 |
+
style.top = `${style.top}px`;
|
| 78 |
+
|
| 79 |
+
setTooltipStyle(style);
|
| 80 |
+
}
|
| 81 |
+
}, [isVisible, position]);
|
| 82 |
+
|
| 83 |
+
return (
|
| 84 |
+
<div className="inline-flex items-center relative">
|
| 85 |
+
{children}
|
| 86 |
+
|
| 87 |
+
{showIcon && (
|
| 88 |
+
<div
|
| 89 |
+
ref={iconRef}
|
| 90 |
+
className={`inline-flex items-center justify-center ml-1 cursor-help ${iconClassName}`}
|
| 91 |
+
onMouseEnter={showTooltip}
|
| 92 |
+
onMouseLeave={hideTooltip}
|
| 93 |
+
>
|
| 94 |
+
<svg
|
| 95 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 96 |
+
className="h-4 w-4 text-gray-400 hover:text-gray-500"
|
| 97 |
+
fill="none"
|
| 98 |
+
viewBox="0 0 24 24"
|
| 99 |
+
stroke="currentColor"
|
| 100 |
+
>
|
| 101 |
+
<path
|
| 102 |
+
strokeLinecap="round"
|
| 103 |
+
strokeLinejoin="round"
|
| 104 |
+
strokeWidth={2}
|
| 105 |
+
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
|
| 106 |
+
/>
|
| 107 |
+
</svg>
|
| 108 |
+
</div>
|
| 109 |
+
)}
|
| 110 |
+
|
| 111 |
+
{isVisible && (
|
| 112 |
+
<div
|
| 113 |
+
ref={tooltipRef}
|
| 114 |
+
className="z-50 bg-gray-800 text-white text-xs rounded py-1 px-2 max-w-xs shadow-lg pointer-events-none"
|
| 115 |
+
style={{
|
| 116 |
+
...tooltipStyle,
|
| 117 |
+
}}
|
| 118 |
+
>
|
| 119 |
+
{content}
|
| 120 |
+
<div
|
| 121 |
+
className={`absolute w-2 h-2 bg-gray-800 transform rotate-45 ${
|
| 122 |
+
position === "top"
|
| 123 |
+
? "bottom-0 translate-y-1/2"
|
| 124 |
+
: position === "bottom"
|
| 125 |
+
? "top-0 -translate-y-1/2"
|
| 126 |
+
: position === "left"
|
| 127 |
+
? "right-0 translate-x-1/2"
|
| 128 |
+
: "left-0 -translate-x-1/2"
|
| 129 |
+
}`}
|
| 130 |
+
style={{
|
| 131 |
+
left:
|
| 132 |
+
position === "top" || position === "bottom"
|
| 133 |
+
? "calc(50% - 4px)"
|
| 134 |
+
: "",
|
| 135 |
+
top:
|
| 136 |
+
position === "left" || position === "right"
|
| 137 |
+
? "calc(50% - 4px)"
|
| 138 |
+
: "",
|
| 139 |
+
}}
|
| 140 |
+
/>
|
| 141 |
+
</div>
|
| 142 |
+
)}
|
| 143 |
+
</div>
|
| 144 |
+
);
|
| 145 |
+
};
|
leaderboard-app/eslint.config.mjs
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { dirname } from "path";
|
| 2 |
+
import { fileURLToPath } from "url";
|
| 3 |
+
import { FlatCompat } from "@eslint/eslintrc";
|
| 4 |
+
|
| 5 |
+
const __filename = fileURLToPath(import.meta.url);
|
| 6 |
+
const __dirname = dirname(__filename);
|
| 7 |
+
|
| 8 |
+
const compat = new FlatCompat({
|
| 9 |
+
baseDirectory: __dirname,
|
| 10 |
+
});
|
| 11 |
+
|
| 12 |
+
const eslintConfig = [...compat.extends("next/core-web-vitals")];
|
| 13 |
+
|
| 14 |
+
export default eslintConfig;
|
leaderboard-app/jsconfig.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"paths": {
|
| 4 |
+
"@/*": ["./*"]
|
| 5 |
+
}
|
| 6 |
+
}
|
| 7 |
+
}
|
leaderboard-app/lib/utils.js
ADDED
|
@@ -0,0 +1,708 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// lib/utils.js
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* Constants
|
| 5 |
+
*/
|
| 6 |
+
const MODEL_COLORS = {
|
| 7 |
+
"gpt-4o": "#0072B2", // Strong blue
|
| 8 |
+
"claude-3.7-sonnet": "#D55E00", // Vermillion/orange-red
|
| 9 |
+
"deepseek-r1": "#F0E442", // Yellow
|
| 10 |
+
o1: "#CC79A7", // Pink
|
| 11 |
+
"gemini-2.0-flash-001": "#009E73", // Bluish green
|
| 12 |
+
"llama-3.1-405b-instruct": "#56B4E9", // Light blue
|
| 13 |
+
};
|
| 14 |
+
|
| 15 |
+
// --- Helper Functions ---
|
| 16 |
+
|
| 17 |
+
/**
|
| 18 |
+
* Converts camelCase to Title Case.
|
| 19 |
+
* @param {string} str Input string.
|
| 20 |
+
* @returns {string} Title Case string.
|
| 21 |
+
*/
|
| 22 |
+
export const camelToTitle = (str) => {
|
| 23 |
+
if (!str) return str;
|
| 24 |
+
const spaced = str.replace(/([A-Z])/g, " $1");
|
| 25 |
+
return spaced.charAt(0).toUpperCase() + spaced.slice(1).trim();
|
| 26 |
+
};
|
| 27 |
+
|
| 28 |
+
/**
|
| 29 |
+
* Helper to format metric/factor names (snake/kebab to Title Case)
|
| 30 |
+
* Needed for display consistency when keys are snake_case.
|
| 31 |
+
*/
|
| 32 |
+
export const formatDisplayKey = (key) => {
|
| 33 |
+
if (!key || typeof key !== "string") return "N/A";
|
| 34 |
+
if (key === "N/A") return "N/A";
|
| 35 |
+
// Handle snake_case or kebab-case input
|
| 36 |
+
return key
|
| 37 |
+
.replace(/_/g, " ")
|
| 38 |
+
.replace(/-/g, " ")
|
| 39 |
+
.trim()
|
| 40 |
+
.replace(/\b\w/g, (l) => l.toUpperCase());
|
| 41 |
+
};
|
| 42 |
+
|
| 43 |
+
/**
|
| 44 |
+
* Helper to get Significance indicator style and tooltip
|
| 45 |
+
*/
|
| 46 |
+
export function getSignificanceIndicator(isSignificant, pValue, alpha = 0.05) {
|
| 47 |
+
const pValueFormatted =
|
| 48 |
+
typeof pValue === "number" && !isNaN(pValue) ? pValue.toFixed(3) : "N/A";
|
| 49 |
+
if (isSignificant === true) {
|
| 50 |
+
return {
|
| 51 |
+
symbol: "✓",
|
| 52 |
+
className: "text-green-600",
|
| 53 |
+
tooltip: `Statistically Significant (p=${pValueFormatted} < ${alpha})`,
|
| 54 |
+
};
|
| 55 |
+
} else if (isSignificant === false) {
|
| 56 |
+
return {
|
| 57 |
+
symbol: "✗",
|
| 58 |
+
className: "text-red-600",
|
| 59 |
+
tooltip: `Not Statistically Significant (p=${pValueFormatted} ≥ ${alpha})`,
|
| 60 |
+
};
|
| 61 |
+
} else {
|
| 62 |
+
return {
|
| 63 |
+
symbol: "?",
|
| 64 |
+
className: "text-gray-400",
|
| 65 |
+
tooltip: "Significance Undetermined",
|
| 66 |
+
};
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
/**
|
| 71 |
+
* Determines the style and tooltip for an equity gap status indicator.
|
| 72 |
+
*/
|
| 73 |
+
export function getEquityIndicatorStyle(
|
| 74 |
+
isConcern,
|
| 75 |
+
isLargeEffect,
|
| 76 |
+
isSignificant,
|
| 77 |
+
pValue,
|
| 78 |
+
effectSizeClass
|
| 79 |
+
) {
|
| 80 |
+
const pValueText =
|
| 81 |
+
typeof pValue === "number" && !isNaN(pValue)
|
| 82 |
+
? `p=${pValue.toFixed(3)}`
|
| 83 |
+
: "p=N/A";
|
| 84 |
+
const effectText = `Effect: ${effectSizeClass || "N/A"}`;
|
| 85 |
+
if (isConcern === true) {
|
| 86 |
+
return {
|
| 87 |
+
icon: "▲",
|
| 88 |
+
colorClass: "text-red-600",
|
| 89 |
+
tooltip: `Equity Concern (${effectText}, Significant, ${pValueText})`,
|
| 90 |
+
};
|
| 91 |
+
} else if (isSignificant === null) {
|
| 92 |
+
return {
|
| 93 |
+
icon: "?",
|
| 94 |
+
colorClass: "text-gray-500",
|
| 95 |
+
tooltip: `Significance Undetermined (${effectText})`,
|
| 96 |
+
};
|
| 97 |
+
} else if (isLargeEffect === true && isSignificant === false) {
|
| 98 |
+
return {
|
| 99 |
+
icon: "●",
|
| 100 |
+
colorClass: "text-yellow-600",
|
| 101 |
+
tooltip: `Large Effect but Not Statistically Significant (${pValueText})`,
|
| 102 |
+
};
|
| 103 |
+
} else if (isSignificant === true) {
|
| 104 |
+
return {
|
| 105 |
+
icon: "✓",
|
| 106 |
+
colorClass: "text-green-600",
|
| 107 |
+
tooltip: `Statistically Significant but Not Large Effect (${effectText}, ${pValueText})`,
|
| 108 |
+
};
|
| 109 |
+
} else {
|
| 110 |
+
return {
|
| 111 |
+
icon: "✓",
|
| 112 |
+
colorClass: "text-gray-400",
|
| 113 |
+
tooltip: `Not Statistically Significant (${effectText}, ${pValueText})`,
|
| 114 |
+
};
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
/**
|
| 119 |
+
* Determine styling based on score for generic BADGES (background + text)
|
| 120 |
+
*/
|
| 121 |
+
export function getScoreBadgeColor(score, min = 0, max = 100) {
|
| 122 |
+
const numericScore = Number(score);
|
| 123 |
+
if (
|
| 124 |
+
score === null ||
|
| 125 |
+
score === undefined ||
|
| 126 |
+
score === "N/A" ||
|
| 127 |
+
isNaN(numericScore)
|
| 128 |
+
) {
|
| 129 |
+
return "bg-gray-100 text-gray-800";
|
| 130 |
+
}
|
| 131 |
+
const range = Math.abs(max - min);
|
| 132 |
+
if (range <= 0) return "bg-gray-100 text-gray-800";
|
| 133 |
+
let percent;
|
| 134 |
+
if (max > min) {
|
| 135 |
+
percent = ((numericScore - min) / range) * 100;
|
| 136 |
+
} else {
|
| 137 |
+
percent = ((min - numericScore) / range) * 100;
|
| 138 |
+
}
|
| 139 |
+
if (percent >= 80) return "bg-green-100 text-green-800";
|
| 140 |
+
if (percent >= 50) return "bg-blue-100 text-blue-800";
|
| 141 |
+
if (percent >= 20) return "bg-yellow-100 text-yellow-800";
|
| 142 |
+
return "bg-red-100 text-red-800";
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
/**
|
| 146 |
+
* Determine TEXT color based on score (0-100 scale, higher is better)
|
| 147 |
+
*/
|
| 148 |
+
export function getScoreColor(score) {
|
| 149 |
+
const numericScore = Number(score);
|
| 150 |
+
if (
|
| 151 |
+
score === null ||
|
| 152 |
+
score === undefined ||
|
| 153 |
+
score === "N/A" ||
|
| 154 |
+
isNaN(numericScore)
|
| 155 |
+
) {
|
| 156 |
+
return "text-gray-400";
|
| 157 |
+
}
|
| 158 |
+
if (numericScore >= 80) return "text-green-600 font-medium";
|
| 159 |
+
if (numericScore >= 60) return "text-blue-600";
|
| 160 |
+
if (numericScore >= 40) return "text-yellow-600";
|
| 161 |
+
return "text-red-600";
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
/**
|
| 165 |
+
* Tooltip text for metrics and table headers - Accepts original keys
|
| 166 |
+
*/
|
| 167 |
+
export const getMetricTooltip = (key) => {
|
| 168 |
+
// Format the key for display/lookup in tooltips map if needed
|
| 169 |
+
const titleCaseKey = formatDisplayKey(key); // Convert snake_case/camelCase to Title Case
|
| 170 |
+
|
| 171 |
+
const tooltips = {
|
| 172 |
+
// Use Title Case keys matching dropdowns/headers
|
| 173 |
+
// High-level
|
| 174 |
+
Helpfulness:
|
| 175 |
+
"How well the model provides useful assistance that addresses user needs",
|
| 176 |
+
Communication:
|
| 177 |
+
"Quality of clarity, coherence, and appropriateness of writing style",
|
| 178 |
+
Understanding:
|
| 179 |
+
"How well the model comprehends requests and contextual information",
|
| 180 |
+
Adaptiveness:
|
| 181 |
+
"How well the model adjusts to user needs and feedback during conversation",
|
| 182 |
+
Trustworthiness:
|
| 183 |
+
"Transparency, accuracy, and consistency in model responses",
|
| 184 |
+
Personality:
|
| 185 |
+
"Consistency and definition of the model's persona and ethical alignment",
|
| 186 |
+
"Background And Culture":
|
| 187 |
+
"Cultural sensitivity, relevance, and freedom from bias",
|
| 188 |
+
"Repeat Usage":
|
| 189 |
+
"User satisfaction and willingness to use the model again (score 0-100).",
|
| 190 |
+
|
| 191 |
+
// Low-level (use formatted names matching display)
|
| 192 |
+
Effectiveness: "How effectively the model helps accomplish specific goals",
|
| 193 |
+
Comprehensiveness:
|
| 194 |
+
"How thoroughly the model addresses all aspects of requests",
|
| 195 |
+
Usefulness: "Practicality and relevance of suggestions or solutions",
|
| 196 |
+
"Tone And Language Style":
|
| 197 |
+
"Appropriateness of tone and language for the context",
|
| 198 |
+
"Conversation Flow": "Natural and conversational quality of responses",
|
| 199 |
+
"Detail And Technical Language":
|
| 200 |
+
"Appropriate level of detail and technical language",
|
| 201 |
+
Accuracy: "Accuracy in interpreting user requests",
|
| 202 |
+
"Context Memory": "Ability to maintain conversation context",
|
| 203 |
+
Intuitiveness: "Ability to pick up on implicit aspects of requests",
|
| 204 |
+
Flexibility: "Adapting responses based on user feedback",
|
| 205 |
+
Clarity: "Ability to clarify ambiguities or misunderstandings",
|
| 206 |
+
"Conversation Building": "Building upon previous exchanges in conversation",
|
| 207 |
+
Consistency: "Consistency of responses across similar questions",
|
| 208 |
+
Confidence: "User confidence in accuracy of information",
|
| 209 |
+
Transparency: "Openness about limitations or uncertainties",
|
| 210 |
+
"Personality Consistency":
|
| 211 |
+
"Consistency of personality throughout interactions",
|
| 212 |
+
"Distinct Personality": "How well-defined the model's personality is",
|
| 213 |
+
"Honesty Empathy Fairness": "Alignment with ethical expectations",
|
| 214 |
+
"Ethical Alignment": "Alignment with user culture, viewpoint, or values",
|
| 215 |
+
"Cultural Awareness":
|
| 216 |
+
"Recognition of when cultural perspective is relevant",
|
| 217 |
+
"Bias And Stereotypes": "Freedom from stereotypes and bias in responses",
|
| 218 |
+
|
| 219 |
+
// Table headers
|
| 220 |
+
"Overall Score":
|
| 221 |
+
"Average score across high-level categories (0-100). Higher is better.",
|
| 222 |
+
"Overall SD":
|
| 223 |
+
"Standard Deviation (± points) of scores across high-level categories. Lower indicates more consistent performance across capabilities.",
|
| 224 |
+
"Max Equity Gap":
|
| 225 |
+
"Score difference (points) for the demographic gap with the largest statistical effect size for this model. Status icon indicates Equity Concern (▲) and/or Significance (✓/✗/?). Hover for details.",
|
| 226 |
+
"Max Gap Area":
|
| 227 |
+
"The specific Demographic Factor and Category where the 'Max Equity Gap' (largest effect size gap) occurred for this model.",
|
| 228 |
+
"Equity Concerns (%)":
|
| 229 |
+
"Percentage of evaluated demographic gaps flagged as Equity Concerns (Large Effect & Statistically Significant, p<0.05). Lower is better.",
|
| 230 |
+
"User Retention":
|
| 231 |
+
"Model score for the 'Repeat Usage' category (0-100), indicating likelihood of users using the model again.",
|
| 232 |
+
};
|
| 233 |
+
// Try lookup with formatted key, then original key as fallback
|
| 234 |
+
return tooltips[titleCaseKey] || tooltips[key] || "No description available";
|
| 235 |
+
};
|
| 236 |
+
|
| 237 |
+
/**
|
| 238 |
+
* Badge color based on Effect Size Class
|
| 239 |
+
*/
|
| 240 |
+
export function getEffectSizeBadgeColor(effectSizeClass) {
|
| 241 |
+
if (!effectSizeClass || effectSizeClass === "N/A") {
|
| 242 |
+
return "bg-gray-100 text-gray-800";
|
| 243 |
+
}
|
| 244 |
+
switch (effectSizeClass) {
|
| 245 |
+
case "Negligible":
|
| 246 |
+
return "bg-green-100 text-green-800";
|
| 247 |
+
case "Small":
|
| 248 |
+
return "bg-blue-100 text-blue-800";
|
| 249 |
+
case "Medium":
|
| 250 |
+
return "bg-yellow-100 text-yellow-800";
|
| 251 |
+
case "Large":
|
| 252 |
+
return "bg-red-100 text-red-800";
|
| 253 |
+
default:
|
| 254 |
+
return "bg-gray-100 text-gray-800";
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
|
| 258 |
+
/**
|
| 259 |
+
* Helper function to process task performance data
|
| 260 |
+
* Expects rawData input with snake_case keys
|
| 261 |
+
*/
|
| 262 |
+
function processTaskPerformance(rawData, taskCategoryMap, modelOrder) {
|
| 263 |
+
const result = {
|
| 264 |
+
bestModelPerTask: {},
|
| 265 |
+
keyMetricsByTask: {},
|
| 266 |
+
bestModelPerTaskCategory: {
|
| 267 |
+
creative: null,
|
| 268 |
+
practical: null,
|
| 269 |
+
analytical: null,
|
| 270 |
+
},
|
| 271 |
+
keyMetricsByTaskCategory: { creative: [], practical: [], analytical: [] },
|
| 272 |
+
};
|
| 273 |
+
// Access original snake_case key from input
|
| 274 |
+
const taskPerformance = rawData?.task_level_performance;
|
| 275 |
+
|
| 276 |
+
if (!taskPerformance || typeof taskPerformance !== "object") {
|
| 277 |
+
console.warn(
|
| 278 |
+
"Task level performance data missing or invalid in processTaskPerformance input."
|
| 279 |
+
);
|
| 280 |
+
return result;
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
// Task names are keys in taskPerformance
|
| 284 |
+
Object.keys(taskPerformance).forEach((taskName) => {
|
| 285 |
+
const taskData = taskPerformance[taskName];
|
| 286 |
+
if (!taskData) return;
|
| 287 |
+
let taskBestModel = null;
|
| 288 |
+
let taskBestAvgScore = -Infinity;
|
| 289 |
+
let taskBestModelMetrics = null;
|
| 290 |
+
modelOrder.forEach((modelName) => {
|
| 291 |
+
// Iterate through known models
|
| 292 |
+
const modelMetrics = taskData[modelName];
|
| 293 |
+
if (modelMetrics && typeof modelMetrics === "object") {
|
| 294 |
+
// Access metric scores using original snake_case keys within modelMetrics
|
| 295 |
+
const scores = Object.values(modelMetrics)
|
| 296 |
+
.map((s) => Number(s))
|
| 297 |
+
.filter((s) => !isNaN(s));
|
| 298 |
+
if (scores.length > 0) {
|
| 299 |
+
const avgScore =
|
| 300 |
+
scores.reduce((sum, score) => sum + score, 0) / scores.length;
|
| 301 |
+
if (avgScore > taskBestAvgScore) {
|
| 302 |
+
taskBestAvgScore = avgScore;
|
| 303 |
+
taskBestModel = modelName;
|
| 304 |
+
taskBestModelMetrics = modelMetrics;
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
}
|
| 308 |
+
});
|
| 309 |
+
|
| 310 |
+
if (taskBestModel && taskBestModelMetrics) {
|
| 311 |
+
result.bestModelPerTask[taskName] = {
|
| 312 |
+
model: taskBestModel,
|
| 313 |
+
score: taskBestAvgScore,
|
| 314 |
+
color: MODEL_COLORS[taskBestModel] || "#999999",
|
| 315 |
+
};
|
| 316 |
+
// Extract top metrics (keys are snake_case)
|
| 317 |
+
const metricsArray = Object.entries(taskBestModelMetrics)
|
| 318 |
+
.map(([metricKey, score]) => ({ metricKey, score: Number(score) || 0 }))
|
| 319 |
+
.sort((a, b) => b.score - a.score);
|
| 320 |
+
// Store with snake_case key, add display name
|
| 321 |
+
result.keyMetricsByTask[taskName] = metricsArray
|
| 322 |
+
.slice(0, 3)
|
| 323 |
+
.map((m) => ({ ...m, metricName: formatDisplayKey(m.metricKey) }));
|
| 324 |
+
} else {
|
| 325 |
+
result.bestModelPerTask[taskName] = {
|
| 326 |
+
model: "N/A",
|
| 327 |
+
score: "N/A",
|
| 328 |
+
color: "#999999",
|
| 329 |
+
};
|
| 330 |
+
result.keyMetricsByTask[taskName] = [];
|
| 331 |
+
}
|
| 332 |
+
});
|
| 333 |
+
|
| 334 |
+
// Task Categories processing
|
| 335 |
+
const tasksByCategory = { creative: [], practical: [], analytical: [] };
|
| 336 |
+
Object.entries(taskCategoryMap).forEach(([task, category]) => {
|
| 337 |
+
if (tasksByCategory[category] && taskPerformance[task]) {
|
| 338 |
+
tasksByCategory[category].push(task);
|
| 339 |
+
}
|
| 340 |
+
});
|
| 341 |
+
Object.entries(tasksByCategory).forEach(([category, tasks]) => {
|
| 342 |
+
const categoryNameDisplay = `${
|
| 343 |
+
category.charAt(0).toUpperCase() + category.slice(1)
|
| 344 |
+
} Tasks`;
|
| 345 |
+
if (tasks.length === 0) {
|
| 346 |
+
result.bestModelPerTaskCategory[category] = {
|
| 347 |
+
model: "N/A",
|
| 348 |
+
score: "N/A",
|
| 349 |
+
color: "#999999",
|
| 350 |
+
categoryName: categoryNameDisplay,
|
| 351 |
+
};
|
| 352 |
+
result.keyMetricsByTaskCategory[category] = [];
|
| 353 |
+
return;
|
| 354 |
+
}
|
| 355 |
+
const categoryModelScores = {};
|
| 356 |
+
modelOrder.forEach((modelName) => {
|
| 357 |
+
categoryModelScores[modelName] = { totalScore: 0, count: 0, metrics: {} };
|
| 358 |
+
tasks.forEach((task) => {
|
| 359 |
+
if (taskPerformance[task]?.[modelName]) {
|
| 360 |
+
// metricKey is original snake_case here
|
| 361 |
+
Object.entries(taskPerformance[task][modelName]).forEach(
|
| 362 |
+
([metricKey, score]) => {
|
| 363 |
+
const numScore = Number(score);
|
| 364 |
+
if (!isNaN(numScore)) {
|
| 365 |
+
categoryModelScores[modelName].totalScore += numScore;
|
| 366 |
+
categoryModelScores[modelName].count++;
|
| 367 |
+
if (!categoryModelScores[modelName].metrics[metricKey])
|
| 368 |
+
categoryModelScores[modelName].metrics[metricKey] = {
|
| 369 |
+
sum: 0,
|
| 370 |
+
count: 0,
|
| 371 |
+
};
|
| 372 |
+
categoryModelScores[modelName].metrics[metricKey].sum +=
|
| 373 |
+
numScore;
|
| 374 |
+
categoryModelScores[modelName].metrics[metricKey].count++;
|
| 375 |
+
}
|
| 376 |
+
}
|
| 377 |
+
);
|
| 378 |
+
}
|
| 379 |
+
});
|
| 380 |
+
});
|
| 381 |
+
let bestAvg = -Infinity;
|
| 382 |
+
let bestCatModel = null;
|
| 383 |
+
Object.entries(categoryModelScores).forEach(([model, data]) => {
|
| 384 |
+
if (data.count > 0) {
|
| 385 |
+
const avg = data.totalScore / data.count;
|
| 386 |
+
if (avg > bestAvg) {
|
| 387 |
+
bestAvg = avg;
|
| 388 |
+
bestCatModel = model;
|
| 389 |
+
}
|
| 390 |
+
}
|
| 391 |
+
});
|
| 392 |
+
|
| 393 |
+
if (bestCatModel) {
|
| 394 |
+
result.bestModelPerTaskCategory[category] = {
|
| 395 |
+
model: bestCatModel,
|
| 396 |
+
score: Number(bestAvg.toFixed(1)),
|
| 397 |
+
color: MODEL_COLORS[bestCatModel] || "#999999",
|
| 398 |
+
categoryName: categoryNameDisplay,
|
| 399 |
+
};
|
| 400 |
+
const bestModelMetricsData =
|
| 401 |
+
categoryModelScores[bestCatModel]?.metrics || {};
|
| 402 |
+
// metricKey is snake_case
|
| 403 |
+
const metricAverages = Object.entries(bestModelMetricsData)
|
| 404 |
+
.map(([metricKey, data]) => ({
|
| 405 |
+
metricKey,
|
| 406 |
+
score: data.count > 0 ? data.sum / data.count : 0,
|
| 407 |
+
}))
|
| 408 |
+
.sort((a, b) => b.score - a.score);
|
| 409 |
+
// Store with original key, add display name
|
| 410 |
+
result.keyMetricsByTaskCategory[category] = metricAverages
|
| 411 |
+
.slice(0, 5)
|
| 412 |
+
.map((m) => ({
|
| 413 |
+
metric: formatDisplayKey(m.metricKey),
|
| 414 |
+
score: m.score,
|
| 415 |
+
scoreDisplay: m.score.toFixed(1),
|
| 416 |
+
}));
|
| 417 |
+
} else {
|
| 418 |
+
result.bestModelPerTaskCategory[category] = {
|
| 419 |
+
model: "N/A",
|
| 420 |
+
score: "N/A",
|
| 421 |
+
color: "#999999",
|
| 422 |
+
categoryName: categoryNameDisplay,
|
| 423 |
+
};
|
| 424 |
+
result.keyMetricsByTaskCategory[category] = [];
|
| 425 |
+
}
|
| 426 |
+
});
|
| 427 |
+
return result; // Returns object with camelCase keys
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
/**
|
| 431 |
+
* Prepares the data from leaderboard_data.json for visualization
|
| 432 |
+
* FINAL v4: Reverted deep camelCase conversion. Processes top-level keys and adds equity concern %.
|
| 433 |
+
* Keeps nested raw data keys as original (snake_case).
|
| 434 |
+
* @param {Object} rawDataInput - The raw data from leaderboard_data.json (expected snake_case)
|
| 435 |
+
* @returns {Object} - Processed data ready for visualization
|
| 436 |
+
*/
|
| 437 |
+
export function prepareDataForVisualization(rawDataInput) {
|
| 438 |
+
// Basic Validation
|
| 439 |
+
const defaultReturn = {
|
| 440 |
+
models: [],
|
| 441 |
+
metricsData: { highLevelCategories: {}, lowLevelMetrics: {} },
|
| 442 |
+
radarData: [],
|
| 443 |
+
bestPerCategory: {},
|
| 444 |
+
bestPerMetric: {},
|
| 445 |
+
overviewCardData: {},
|
| 446 |
+
rawData: {},
|
| 447 |
+
metadata: {},
|
| 448 |
+
equityAnalysis: {},
|
| 449 |
+
};
|
| 450 |
+
if (
|
| 451 |
+
!rawDataInput ||
|
| 452 |
+
!rawDataInput.model_order ||
|
| 453 |
+
!Array.isArray(rawDataInput.model_order)
|
| 454 |
+
) {
|
| 455 |
+
console.error(
|
| 456 |
+
"prepareDataForVisualization received invalid rawData.",
|
| 457 |
+
rawDataInput
|
| 458 |
+
);
|
| 459 |
+
return defaultReturn;
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
// Keep original references where structure is maintained
|
| 463 |
+
const modelOrder = rawDataInput.model_order;
|
| 464 |
+
const equityAnalysis = rawDataInput.equity_analysis || {
|
| 465 |
+
all_equity_gaps: [],
|
| 466 |
+
model_max_effect_gaps: {},
|
| 467 |
+
universal_issues: [],
|
| 468 |
+
assessment_method: {},
|
| 469 |
+
demographic_variation_stats: {},
|
| 470 |
+
};
|
| 471 |
+
const allGaps = equityAnalysis.all_equity_gaps || [];
|
| 472 |
+
const metadata = rawDataInput.metadata || {};
|
| 473 |
+
const mrpDemographicsRaw = rawDataInput.mrp_demographics || {};
|
| 474 |
+
const taskLevelPerformanceRaw = rawDataInput.task_level_performance || {};
|
| 475 |
+
|
| 476 |
+
// Process MRP Demographics for filtering options
|
| 477 |
+
const demographicFactors = new Set();
|
| 478 |
+
const demographicLevels = {};
|
| 479 |
+
const availableMetrics = new Set();
|
| 480 |
+
if (mrpDemographicsRaw && typeof mrpDemographicsRaw === "object") {
|
| 481 |
+
Object.values(mrpDemographicsRaw).forEach((modelData) => {
|
| 482 |
+
Object.entries(modelData || {}).forEach(([factor, factorData]) => {
|
| 483 |
+
demographicFactors.add(factor);
|
| 484 |
+
if (!demographicLevels[factor]) demographicLevels[factor] = new Set();
|
| 485 |
+
Object.entries(factorData || {}).forEach(([level, levelData]) => {
|
| 486 |
+
demographicLevels[factor].add(level);
|
| 487 |
+
Object.keys(levelData || {}).forEach((metric) =>
|
| 488 |
+
availableMetrics.add(metric)
|
| 489 |
+
);
|
| 490 |
+
});
|
| 491 |
+
});
|
| 492 |
+
}); // metric is Title Case here from Python processing
|
| 493 |
+
}
|
| 494 |
+
const demographicOptions = {};
|
| 495 |
+
demographicFactors.forEach((factor) => {
|
| 496 |
+
demographicOptions[factor] = Array.from(
|
| 497 |
+
demographicLevels[factor] || new Set()
|
| 498 |
+
).sort();
|
| 499 |
+
});
|
| 500 |
+
const availableMetricsList = Array.from(availableMetrics).sort(); // These are Title Case
|
| 501 |
+
|
| 502 |
+
// Process Overall Rankings -> camelCase & add equity concern %
|
| 503 |
+
const overallRankingProcessed = (rawDataInput.overall_ranking || []).map(
|
| 504 |
+
(modelData) => {
|
| 505 |
+
const modelName = modelData.model;
|
| 506 |
+
// details object keys are snake_case from python
|
| 507 |
+
const maxEffectGapDetails = modelData.max_effect_gap_details || {};
|
| 508 |
+
const safeParseFloat = (val) => {
|
| 509 |
+
const num = Number(val);
|
| 510 |
+
return isNaN(num) ? null : num;
|
| 511 |
+
};
|
| 512 |
+
|
| 513 |
+
const modelSpecificGaps = allGaps.filter(
|
| 514 |
+
(gap) => gap.model === modelName
|
| 515 |
+
); // Access snake_case keys in allGaps
|
| 516 |
+
const totalGapsForModel = modelSpecificGaps.length;
|
| 517 |
+
const concernCountForModel = modelSpecificGaps.filter(
|
| 518 |
+
(gap) => gap.is_equity_concern === true
|
| 519 |
+
).length;
|
| 520 |
+
let equityConcernPercentage = null;
|
| 521 |
+
if (totalGapsForModel > 0) {
|
| 522 |
+
equityConcernPercentage =
|
| 523 |
+
(concernCountForModel / totalGapsForModel) * 100;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
// Return structure with camelCase keys
|
| 527 |
+
return {
|
| 528 |
+
rank: modelData.rank,
|
| 529 |
+
model: modelName,
|
| 530 |
+
overallScore: safeParseFloat(modelData.overall_score),
|
| 531 |
+
highLevelCatScore: safeParseFloat(modelData.high_level_cat_score),
|
| 532 |
+
lowLevelCatScore: safeParseFloat(modelData.low_level_cat_score),
|
| 533 |
+
color: MODEL_COLORS[modelName] || "#999999",
|
| 534 |
+
// Use snake_case keys from input JSON for these fields
|
| 535 |
+
stdDevAcrossCats: modelData.std_dev_across_cats,
|
| 536 |
+
stdDevAcrossCatsNumeric: safeParseFloat(modelData.std_dev_across_cats),
|
| 537 |
+
repeatUsageScore: safeParseFloat(modelData.repeat_usage_score),
|
| 538 |
+
maxEffectCategory: modelData.max_effect_category, // snake_case from input
|
| 539 |
+
maxEffectFactor: maxEffectGapDetails.demographic_factor, // snake_case from input
|
| 540 |
+
maxEffectSize: safeParseFloat(maxEffectGapDetails.effect_size),
|
| 541 |
+
maxEffectGap: safeParseFloat(maxEffectGapDetails.score_range),
|
| 542 |
+
maxEffectConcernFlag: maxEffectGapDetails.is_equity_concern ?? false,
|
| 543 |
+
maxEffectSignificant: maxEffectGapDetails.is_statistically_significant,
|
| 544 |
+
maxEffectPValue: maxEffectGapDetails.p_value,
|
| 545 |
+
maxEffectSizeClass: maxEffectGapDetails.effect_size_class || "N/A",
|
| 546 |
+
maxEffectRawNHeuristic:
|
| 547 |
+
maxEffectGapDetails.raw_n_confidence_heuristic || "N/A",
|
| 548 |
+
maxEffectGapDetails: maxEffectGapDetails, // Pass original snake_case details
|
| 549 |
+
equityConcernPercentage: equityConcernPercentage,
|
| 550 |
+
};
|
| 551 |
+
}
|
| 552 |
+
);
|
| 553 |
+
|
| 554 |
+
// Process Metrics Breakdown -> camelCase keys for structure, keep original metric keys inside
|
| 555 |
+
const metricsBreakdownProcessed = {
|
| 556 |
+
highLevelCategories: {},
|
| 557 |
+
lowLevelMetrics: {},
|
| 558 |
+
};
|
| 559 |
+
if (
|
| 560 |
+
rawDataInput.metrics_breakdown &&
|
| 561 |
+
typeof rawDataInput.metrics_breakdown === "object"
|
| 562 |
+
) {
|
| 563 |
+
const processCategory = (displayKey, categoryData) => {
|
| 564 |
+
// Input displayKey is Title Case from python output
|
| 565 |
+
if (!categoryData || !categoryData.model_scores) {
|
| 566 |
+
console.warn(`Missing model_scores for category: ${displayKey}`);
|
| 567 |
+
return {
|
| 568 |
+
modelScores: {},
|
| 569 |
+
topPerformer: { model: "N/A", score: null, color: "#999999" },
|
| 570 |
+
};
|
| 571 |
+
}
|
| 572 |
+
const internalMetricKey = categoryData._internal_category_name; // Get original snake_case key
|
| 573 |
+
const processedModelScores = {};
|
| 574 |
+
modelOrder.forEach((modelName) => {
|
| 575 |
+
const scores = categoryData.model_scores[modelName]; // Access model scores
|
| 576 |
+
if (!scores) {
|
| 577 |
+
processedModelScores[modelName] = {
|
| 578 |
+
nationalScore: null,
|
| 579 |
+
color: MODEL_COLORS[modelName] || "#999999",
|
| 580 |
+
maxEffectGapInfo: {},
|
| 581 |
+
};
|
| 582 |
+
return;
|
| 583 |
+
}
|
| 584 |
+
const maxEffectGapInfoForCat = scores.max_effect_gap_info || {}; // snake_case keys inside? Check python output. Assume yes.
|
| 585 |
+
processedModelScores[modelName] = {
|
| 586 |
+
nationalScore: scores.national_score ?? null,
|
| 587 |
+
color: MODEL_COLORS[modelName] || "#999999",
|
| 588 |
+
// Keep original snake_case keys for gap info within this structure
|
| 589 |
+
maxEffectGapInfo: maxEffectGapInfoForCat,
|
| 590 |
+
};
|
| 591 |
+
});
|
| 592 |
+
const topPerf = categoryData.top_performer || {};
|
| 593 |
+
const topPerfScore =
|
| 594 |
+
topPerf.score === "N/A" || topPerf.score === null
|
| 595 |
+
? null
|
| 596 |
+
: Number(topPerf.score);
|
| 597 |
+
return {
|
| 598 |
+
modelScores: processedModelScores, // Nested scores
|
| 599 |
+
topPerformer: {
|
| 600 |
+
model: topPerf.model || "N/A",
|
| 601 |
+
score: isNaN(topPerfScore) ? null : topPerfScore,
|
| 602 |
+
color: MODEL_COLORS[topPerf.model] || "#999999",
|
| 603 |
+
},
|
| 604 |
+
internalMetricKey: internalMetricKey, // Store original snake_case key
|
| 605 |
+
};
|
| 606 |
+
};
|
| 607 |
+
Object.entries(
|
| 608 |
+
rawDataInput.metrics_breakdown.high_level_categories || {}
|
| 609 |
+
).forEach(([displayKey, catData]) => {
|
| 610 |
+
metricsBreakdownProcessed.highLevelCategories[displayKey] =
|
| 611 |
+
processCategory(displayKey, catData);
|
| 612 |
+
});
|
| 613 |
+
Object.entries(
|
| 614 |
+
rawDataInput.metrics_breakdown.low_level_metrics || {}
|
| 615 |
+
).forEach(([displayKey, metricData]) => {
|
| 616 |
+
metricsBreakdownProcessed.lowLevelMetrics[displayKey] = processCategory(
|
| 617 |
+
displayKey,
|
| 618 |
+
metricData
|
| 619 |
+
);
|
| 620 |
+
});
|
| 621 |
+
} else {
|
| 622 |
+
console.warn("rawDataInput.metrics_breakdown is missing or not an object.");
|
| 623 |
+
}
|
| 624 |
+
|
| 625 |
+
// Prepare Radar Chart Data
|
| 626 |
+
const radarChartData = Object.entries(
|
| 627 |
+
metricsBreakdownProcessed.highLevelCategories
|
| 628 |
+
).map(([displayKey, categoryData]) => {
|
| 629 |
+
// displayKey is Title Case here
|
| 630 |
+
const radarEntry = { category: displayKey }; // Use Title Case for radar axis label
|
| 631 |
+
modelOrder.forEach((modelName) => {
|
| 632 |
+
radarEntry[modelName] =
|
| 633 |
+
Number(categoryData.modelScores[modelName]?.nationalScore) || 0;
|
| 634 |
+
});
|
| 635 |
+
return radarEntry;
|
| 636 |
+
});
|
| 637 |
+
|
| 638 |
+
// Prepare Top Performers
|
| 639 |
+
const bestPerCategory = {};
|
| 640 |
+
Object.entries(metricsBreakdownProcessed.highLevelCategories).forEach(
|
| 641 |
+
([displayKey, catData]) => {
|
| 642 |
+
bestPerCategory[displayKey] = catData.topPerformer;
|
| 643 |
+
}
|
| 644 |
+
);
|
| 645 |
+
const bestPerMetric = {};
|
| 646 |
+
Object.entries(metricsBreakdownProcessed.lowLevelMetrics).forEach(
|
| 647 |
+
([displayKey, metricData]) => {
|
| 648 |
+
bestPerMetric[displayKey] = metricData.topPerformer;
|
| 649 |
+
}
|
| 650 |
+
);
|
| 651 |
+
|
| 652 |
+
// Prepare Task Performance Data
|
| 653 |
+
const taskCategoryMap = {
|
| 654 |
+
"Generating a Creative Idea": "creative",
|
| 655 |
+
"Creating a Travel Itinerary": "creative",
|
| 656 |
+
"Following Up on a Job Application": "practical",
|
| 657 |
+
"Planning Your Weekly Meals": "practical",
|
| 658 |
+
"Making a Decision Between Options": "analytical",
|
| 659 |
+
"Understanding a Complex Topic": "analytical",
|
| 660 |
+
};
|
| 661 |
+
// Pass the original rawDataInput to the helper, which expects snake_case keys internally
|
| 662 |
+
const taskPerformanceResults = processTaskPerformance(
|
| 663 |
+
rawDataInput,
|
| 664 |
+
taskCategoryMap,
|
| 665 |
+
modelOrder
|
| 666 |
+
);
|
| 667 |
+
const tasks = Object.keys(taskLevelPerformanceRaw || {}); // Use original snake_case keys
|
| 668 |
+
const taskCategories = {};
|
| 669 |
+
Object.entries(taskCategoryMap).forEach(([task, category]) => {
|
| 670 |
+
if (!taskCategories[category]) taskCategories[category] = [];
|
| 671 |
+
if (tasks.includes(task)) taskCategories[category].push(task);
|
| 672 |
+
});
|
| 673 |
+
const taskMetrics = new Set();
|
| 674 |
+
Object.values(taskLevelPerformanceRaw || {}).forEach((taskData) => {
|
| 675 |
+
Object.values(taskData || {}).forEach((modelData) => {
|
| 676 |
+
Object.keys(modelData || {}).forEach((metric) => taskMetrics.add(metric));
|
| 677 |
+
});
|
| 678 |
+
}); // metric is snake_case
|
| 679 |
+
const taskMetricsDisplayList = Array.from(taskMetrics)
|
| 680 |
+
.map(formatDisplayKey)
|
| 681 |
+
.sort(); // Create display list
|
| 682 |
+
const taskMetricsSnakeList = Array.from(taskMetrics).sort(); // List of original snake_case keys
|
| 683 |
+
|
| 684 |
+
// Final Return Structure
|
| 685 |
+
return {
|
| 686 |
+
models: overallRankingProcessed, // camelCase keys for top level
|
| 687 |
+
metricsData: metricsBreakdownProcessed, // Title Case keys for categories/metrics
|
| 688 |
+
radarData: radarChartData,
|
| 689 |
+
bestPerCategory: bestPerCategory, // Title Case keys
|
| 690 |
+
bestPerMetric: bestPerMetric, // Title Case keys
|
| 691 |
+
overviewCardData: taskPerformanceResults, // camelCase keys expected from helper
|
| 692 |
+
rawData: {
|
| 693 |
+
// Keep original structures under camelCase keys for clarity
|
| 694 |
+
taskLevelPerformance: taskLevelPerformanceRaw, // snake_case keys inside
|
| 695 |
+
mrpDemographics: mrpDemographicsRaw, // Title Case metric keys inside
|
| 696 |
+
// Processed lists/maps for filtering/display
|
| 697 |
+
demographicOptions: demographicOptions,
|
| 698 |
+
availableMetrics: availableMetricsList, // Title Case metric names
|
| 699 |
+
tasks: tasks,
|
| 700 |
+
taskCategories: taskCategories,
|
| 701 |
+
taskMetrics: taskMetricsDisplayList, // Title Case metric names for display
|
| 702 |
+
taskMetricsSnake: taskMetricsSnakeList, // snake_case keys for lookup
|
| 703 |
+
taskCategoryMap: taskCategoryMap,
|
| 704 |
+
},
|
| 705 |
+
metadata: metadata, // Original structure
|
| 706 |
+
equityAnalysis: equityAnalysis, // Original structure (snake_case keys)
|
| 707 |
+
};
|
| 708 |
+
}
|
leaderboard-app/next.config.mjs
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/** @type {import('next').NextConfig} */
|
| 2 |
+
const nextConfig = {};
|
| 3 |
+
|
| 4 |
+
export default nextConfig;
|
leaderboard-app/package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard-app/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "leaderboard-app",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"private": true,
|
| 5 |
+
"scripts": {
|
| 6 |
+
"dev": "next dev",
|
| 7 |
+
"build": "next build",
|
| 8 |
+
"start": "next start",
|
| 9 |
+
"lint": "next lint"
|
| 10 |
+
},
|
| 11 |
+
"dependencies": {
|
| 12 |
+
"lucide-react": "^0.487.0",
|
| 13 |
+
"next": "15.2.3",
|
| 14 |
+
"react": "^19.0.0",
|
| 15 |
+
"react-dom": "^19.0.0",
|
| 16 |
+
"recharts": "^2.15.1"
|
| 17 |
+
},
|
| 18 |
+
"devDependencies": {
|
| 19 |
+
"@eslint/eslintrc": "^3",
|
| 20 |
+
"@tailwindcss/postcss": "^4",
|
| 21 |
+
"eslint": "^9",
|
| 22 |
+
"eslint-config-next": "15.2.3",
|
| 23 |
+
"tailwindcss": "^4"
|
| 24 |
+
}
|
| 25 |
+
}
|
leaderboard-app/postcss.config.mjs
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const config = {
|
| 2 |
+
plugins: ["@tailwindcss/postcss"],
|
| 3 |
+
};
|
| 4 |
+
|
| 5 |
+
export default config;
|
leaderboard-app/public/file.svg
ADDED
|
|
leaderboard-app/public/globe.svg
ADDED
|
|
leaderboard-app/public/leaderboard_data.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard-app/public/next.svg
ADDED
|
|
leaderboard-app/public/vercel.svg
ADDED
|
|
leaderboard-app/public/window.svg
ADDED
|
|