Jimin Huang
commited on
Commit
·
15f2c01
1
Parent(s):
e3acf44
fix: alter column name
Browse files
backend/app/asgi.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
ASGI entry point for the Open LLM Leaderboard API.
|
3 |
"""
|
4 |
import os
|
5 |
import uvicorn
|
@@ -65,7 +65,7 @@ logger = logging.getLogger("app")
|
|
65 |
|
66 |
# Create FastAPI application
|
67 |
app = FastAPI(
|
68 |
-
title="Open LLM Leaderboard",
|
69 |
version="1.0.0",
|
70 |
docs_url="/docs",
|
71 |
)
|
@@ -103,4 +103,4 @@ async def startup_event():
|
|
103 |
|
104 |
# Setup cache
|
105 |
setup_cache()
|
106 |
-
logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
|
|
|
1 |
"""
|
2 |
+
ASGI entry point for the Open Greek Financial LLM Leaderboard API.
|
3 |
"""
|
4 |
import os
|
5 |
import uvicorn
|
|
|
65 |
|
66 |
# Create FastAPI application
|
67 |
app = FastAPI(
|
68 |
+
title="Open Greek Financial LLM Leaderboard",
|
69 |
version="1.0.0",
|
70 |
docs_url="/docs",
|
71 |
)
|
|
|
103 |
|
104 |
# Setup cache
|
105 |
setup_cache()
|
106 |
+
logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))
|
backend/app/main.py
CHANGED
@@ -6,7 +6,7 @@ import logging
|
|
6 |
setup_logging()
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
-
app = FastAPI(title="Open LLM Leaderboard API")
|
10 |
|
11 |
@app.on_event("startup")
|
12 |
async def startup_event():
|
@@ -15,4 +15,4 @@ async def startup_event():
|
|
15 |
# Import and include routers after app initialization
|
16 |
from app.api import models, votes
|
17 |
app.include_router(models.router, prefix="/api", tags=["models"])
|
18 |
-
app.include_router(votes.router, prefix="/api", tags=["votes"])
|
|
|
6 |
setup_logging()
|
7 |
logger = logging.getLogger(__name__)
|
8 |
|
9 |
+
app = FastAPI(title="Open Greek Financial LLM Leaderboard API")
|
10 |
|
11 |
@app.on_event("startup")
|
12 |
async def startup_event():
|
|
|
15 |
# Import and include routers after app initialization
|
16 |
from app.api import models, votes
|
17 |
app.include_router(models.router, prefix="/api", tags=["models"])
|
18 |
+
app.include_router(votes.router, prefix="/api", tags=["votes"])
|
frontend/public/index.html
CHANGED
@@ -9,22 +9,22 @@
|
|
9 |
/>
|
10 |
<meta
|
11 |
name="description"
|
12 |
-
content="Interactive leaderboard
|
13 |
/>
|
14 |
|
15 |
<!-- Open Graph / Facebook -->
|
16 |
<meta property="og:type" content="website" />
|
17 |
<meta
|
18 |
property="og:url"
|
19 |
-
content="https://huggingface.co/spaces/
|
20 |
/>
|
21 |
<meta
|
22 |
property="og:title"
|
23 |
-
content="Open LLM Leaderboard - Compare
|
24 |
/>
|
25 |
<meta
|
26 |
property="og:description"
|
27 |
-
content="Interactive leaderboard for comparing LLM performance across
|
28 |
/>
|
29 |
<meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
|
30 |
|
@@ -32,15 +32,15 @@
|
|
32 |
<meta property="twitter:card" content="summary_large_image" />
|
33 |
<meta
|
34 |
property="twitter:url"
|
35 |
-
content="https://huggingface.co/spaces/
|
36 |
/>
|
37 |
<meta
|
38 |
property="twitter:title"
|
39 |
-
content="Open LLM Leaderboard - Compare
|
40 |
/>
|
41 |
<meta
|
42 |
property="twitter:description"
|
43 |
-
content="Interactive leaderboard for comparing LLM performance across
|
44 |
/>
|
45 |
<meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
|
46 |
<!--
|
@@ -53,7 +53,7 @@
|
|
53 |
Learn how to configure a non-root public URL by running `npm run build`.
|
54 |
-->
|
55 |
<title>
|
56 |
-
Open LLM Leaderboard - Compare
|
57 |
</title>
|
58 |
<link
|
59 |
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
|
|
|
9 |
/>
|
10 |
<meta
|
11 |
name="description"
|
12 |
+
content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
|
13 |
/>
|
14 |
|
15 |
<!-- Open Graph / Facebook -->
|
16 |
<meta property="og:type" content="website" />
|
17 |
<meta
|
18 |
property="og:url"
|
19 |
+
content="https://huggingface.co/spaces/TheFinAI/open_greek_finance_llm_leaderboard"
|
20 |
/>
|
21 |
<meta
|
22 |
property="og:title"
|
23 |
+
content="Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area"
|
24 |
/>
|
25 |
<meta
|
26 |
property="og:description"
|
27 |
+
content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
|
28 |
/>
|
29 |
<meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
|
30 |
|
|
|
32 |
<meta property="twitter:card" content="summary_large_image" />
|
33 |
<meta
|
34 |
property="twitter:url"
|
35 |
+
content="https://huggingface.co/spaces/TheFinAI/open_greek_finance_llm_leaderboard"
|
36 |
/>
|
37 |
<meta
|
38 |
property="twitter:title"
|
39 |
+
content="Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area"
|
40 |
/>
|
41 |
<meta
|
42 |
property="twitter:description"
|
43 |
+
content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
|
44 |
/>
|
45 |
<meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
|
46 |
<!--
|
|
|
53 |
Learn how to configure a non-root public URL by running `npm run build`.
|
54 |
-->
|
55 |
<title>
|
56 |
+
Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area
|
57 |
</title>
|
58 |
<link
|
59 |
href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"
|
frontend/src/components/Footer/Footer.js
CHANGED
@@ -12,15 +12,15 @@ const Footer = () => {
|
|
12 |
}}
|
13 |
>
|
14 |
<Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
|
15 |
-
© 2024
|
16 |
-{" "}
|
17 |
<Link
|
18 |
-
href="https://
|
19 |
target="_blank"
|
20 |
rel="noopener noreferrer"
|
21 |
color="inherit"
|
22 |
>
|
23 |
-
|
24 |
</Link>
|
25 |
</Typography>
|
26 |
</Box>
|
|
|
12 |
}}
|
13 |
>
|
14 |
<Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
|
15 |
+
© 2024 The Fin AI - Open Greek Financial LLM Leaderboard - Made by the Fin AI community and based on 🤗.
|
16 |
-{" "}
|
17 |
<Link
|
18 |
+
href="https://thefin.ai"
|
19 |
target="_blank"
|
20 |
rel="noopener noreferrer"
|
21 |
color="inherit"
|
22 |
>
|
23 |
+
thefin.ai
|
24 |
</Link>
|
25 |
</Typography>
|
26 |
</Box>
|
frontend/src/components/Navigation/Navigation.js
CHANGED
@@ -292,36 +292,6 @@ const Navigation = ({ onToggleTheme, mode }) => {
|
|
292 |
External links
|
293 |
</Typography>
|
294 |
</Box>
|
295 |
-
<MenuItem
|
296 |
-
component={MuiLink}
|
297 |
-
href="https://huggingface.co/spaces/open-llm-leaderboard/comparator"
|
298 |
-
target="_blank"
|
299 |
-
sx={{
|
300 |
-
"& svg": {
|
301 |
-
ml: "auto",
|
302 |
-
fontSize: "0.875rem",
|
303 |
-
opacity: 0.6,
|
304 |
-
},
|
305 |
-
}}
|
306 |
-
>
|
307 |
-
Compare models
|
308 |
-
<OpenInNewIcon />
|
309 |
-
</MenuItem>
|
310 |
-
<MenuItem
|
311 |
-
component={MuiLink}
|
312 |
-
href="https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about"
|
313 |
-
target="_blank"
|
314 |
-
sx={{
|
315 |
-
"& svg": {
|
316 |
-
ml: "auto",
|
317 |
-
fontSize: "0.875rem",
|
318 |
-
opacity: 0.6,
|
319 |
-
},
|
320 |
-
}}
|
321 |
-
>
|
322 |
-
About
|
323 |
-
<OpenInNewIcon />
|
324 |
-
</MenuItem>
|
325 |
</Menu>
|
326 |
|
327 |
<Tooltip
|
|
|
292 |
External links
|
293 |
</Typography>
|
294 |
</Box>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
</Menu>
|
296 |
|
297 |
<Tooltip
|
frontend/src/pages/LeaderboardPage/LeaderboardPage.js
CHANGED
@@ -33,7 +33,7 @@ function LeaderboardPage() {
|
|
33 |
<Logo height="80px" />
|
34 |
</Box>
|
35 |
<PageHeader
|
36 |
-
title="Open LLM Leaderboard"
|
37 |
subtitle={
|
38 |
<>
|
39 |
Comparing Large Language Models in an{" "}
|
|
|
33 |
<Logo height="80px" />
|
34 |
</Box>
|
35 |
<PageHeader
|
36 |
+
title="Open Greek Financial LLM Leaderboard"
|
37 |
subtitle={
|
38 |
<>
|
39 |
Comparing Large Language Models in an{" "}
|
frontend/src/pages/QuotePage/QuotePage.js
CHANGED
@@ -13,15 +13,14 @@ import PageHeader from "../../components/shared/PageHeader";
|
|
13 |
|
14 |
const citations = [
|
15 |
{
|
16 |
-
title: "
|
17 |
authors:
|
18 |
-
"
|
19 |
-
citation: `@
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
howpublished = "\\url{https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard}",
|
25 |
}`,
|
26 |
type: "main",
|
27 |
},
|
@@ -44,15 +43,21 @@ const citations = [
|
|
44 |
|
45 |
const priorWork = [
|
46 |
{
|
47 |
-
title: "
|
48 |
authors:
|
49 |
-
"
|
50 |
-
citation: `@
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
}`,
|
57 |
type: "main",
|
58 |
},
|
@@ -62,86 +67,27 @@ const benchmarks = [
|
|
62 |
{
|
63 |
title: "MultiFin: Instruction-Following Evaluation",
|
64 |
authors: "Zhou et al.",
|
65 |
-
citation: `@
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
}`,
|
74 |
-
url: "https://
|
75 |
-
},
|
76 |
-
{
|
77 |
-
title: "BBH: Big-Bench Hard",
|
78 |
-
authors: "Suzgun et al.",
|
79 |
-
citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
|
80 |
-
title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
|
81 |
-
author={Mirac Suzgun and Nathan Scales and Nathanael Schärli and Sebastian Gehrmann and Yi Tay and Hyung Won Chung and Aakanksha Chowdhery and Quoc V. Le and Ed H. Chi and Denny Zhou and Jason Wei},
|
82 |
-
year={2022},
|
83 |
-
eprint={2210.09261},
|
84 |
-
archivePrefix={arXiv},
|
85 |
-
primaryClass={cs.CL},
|
86 |
-
url={https://arxiv.org/abs/2210.09261},
|
87 |
-
}`,
|
88 |
-
url: "https://arxiv.org/abs/2210.09261",
|
89 |
-
},
|
90 |
-
{
|
91 |
-
title: "MATH: Mathematics Aptitude Test of Heuristics - Level 5",
|
92 |
-
authors: "Hendrycks et al.",
|
93 |
-
citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
|
94 |
-
title={Measuring Mathematical Problem Solving With the MATH Dataset},
|
95 |
-
author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
|
96 |
-
year={2021},
|
97 |
-
eprint={2103.03874},
|
98 |
-
archivePrefix={arXiv},
|
99 |
-
primaryClass={cs.LG},
|
100 |
-
url={https://arxiv.org/abs/2103.03874},
|
101 |
-
}`,
|
102 |
-
url: "https://arxiv.org/abs/2103.03874",
|
103 |
-
},
|
104 |
-
{
|
105 |
-
title: "GPQA: Graduate-Level Google-Proof Q&A",
|
106 |
-
authors: "Rein et al.",
|
107 |
-
citation: `@misc{rein2023gpqagraduatelevelgoogleproofqa,
|
108 |
-
title={GPQA: A Graduate-Level Google-Proof Q&A Benchmark},
|
109 |
-
author={David Rein and Betty Li Hou and Asa Cooper Stickland and Jackson Petty and Richard Yuanzhe Pang and Julien Dirani and Julian Michael and Samuel R. Bowman},
|
110 |
-
year={2023},
|
111 |
-
eprint={2311.12022},
|
112 |
-
archivePrefix={arXiv},
|
113 |
-
primaryClass={cs.AI},
|
114 |
-
url={https://arxiv.org/abs/2311.12022},
|
115 |
-
}`,
|
116 |
-
url: "https://arxiv.org/abs/2311.12022",
|
117 |
-
},
|
118 |
-
{
|
119 |
-
title: "MuSR: Multistep Soft Reasoning",
|
120 |
-
authors: "Sprague et al.",
|
121 |
-
citation: `@misc{sprague2024musrtestinglimitschainofthought,
|
122 |
-
title={MuSR: Testing the Limits of Chain-of-thought with Multistep Soft Reasoning},
|
123 |
-
author={Zayne Sprague and Xi Ye and Kaj Bostrom and Swarat Chaudhuri and Greg Durrett},
|
124 |
-
year={2024},
|
125 |
-
eprint={2310.16049},
|
126 |
-
archivePrefix={arXiv},
|
127 |
-
primaryClass={cs.CL},
|
128 |
-
url={https://arxiv.org/abs/2310.16049},
|
129 |
-
}`,
|
130 |
-
url: "https://arxiv.org/abs/2310.16049",
|
131 |
-
},
|
132 |
-
{
|
133 |
-
title: "MMLU-Pro: Massive Multitask Language Understanding Professional",
|
134 |
-
authors: "Wang et al.",
|
135 |
-
citation: `@misc{wang2024mmluprorobustchallengingmultitask,
|
136 |
-
title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},
|
137 |
-
author={Yubo Wang and Xueguang Ma and Ge Zhang and Yuansheng Ni and Abhranil Chandra and Shiguang Guo and Weiming Ren and Aaran Arulraj and Xuan He and Ziyan Jiang and Tianle Li and Max Ku and Kai Wang and Alex Zhuang and Rongqi Fan and Xiang Yue and Wenhu Chen},
|
138 |
-
year={2024},
|
139 |
-
eprint={2406.01574},
|
140 |
-
archivePrefix={arXiv},
|
141 |
-
primaryClass={cs.CL},
|
142 |
-
url={https://arxiv.org/abs/2406.01574},
|
143 |
-
}`,
|
144 |
-
url: "https://arxiv.org/abs/2406.01574",
|
145 |
},
|
146 |
];
|
147 |
|
@@ -229,7 +175,7 @@ function QuotePage() {
|
|
229 |
<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
|
230 |
<PageHeader
|
231 |
title="Citation Information"
|
232 |
-
subtitle="How to cite the Open LLM Leaderboard in your work"
|
233 |
/>
|
234 |
|
235 |
<Alert severity="info" sx={{ mb: 4 }}>
|
|
|
13 |
|
14 |
const citations = [
|
15 |
{
|
16 |
+
title: "FinBen: A Holistic Financial Benchmark for Large Language Models",
|
17 |
authors:
|
18 |
+
"Qianqian Xie et al.",
|
19 |
+
citation: `@article{xie2024finben,
|
20 |
+
title={The finben: An holistic financial benchmark for large language models},
|
21 |
+
author={Xie, Qianqian and Han, Weiguang and Chen, Zhengyu and Xiang, Ruoyu and Zhang, Xiao and He, Yueru and Xiao, Mengxi and Li, Dong and Dai, Yongfu and Feng, Duanyu and others},
|
22 |
+
journal={arXiv preprint arXiv:2402.12659},
|
23 |
+
year={2024}
|
|
|
24 |
}`,
|
25 |
type: "main",
|
26 |
},
|
|
|
43 |
|
44 |
const priorWork = [
|
45 |
{
|
46 |
+
title: "PIXIU: a large language model, instruction data and evaluation benchmark for finance",
|
47 |
authors:
|
48 |
+
"Qianqian Xie et al.",
|
49 |
+
citation: `@inproceedings{10.5555/3666122.3667576,
|
50 |
+
author = {Xie, Qianqian and Han, Weiguang and Zhang, Xiao and Lai, Yanzhao and Peng, Min and Lopez-Lira, Alejandro and Huang, Jimin},
|
51 |
+
title = {PIXIU: a large language model, instruction data and evaluation benchmark for finance},
|
52 |
+
year = {2024},
|
53 |
+
publisher = {Curran Associates Inc.},
|
54 |
+
address = {Red Hook, NY, USA},
|
55 |
+
abstract = {Although large language models (LLMs) have shown great performance in natural language processing (NLP) in the financial domain, there are no publicly available financially tailored LLMs, instruction tuning datasets, and evaluation benchmarks, which is critical for continually pushing forward the open-source development of financial artificial intelligence (AI). This paper introduces PIXIU, a comprehensive framework including the first financial LLM based on fine-tuning LLaMA with instruction data, the first instruction data with 128K data samples to support the fine-tuning, and an evaluation benchmark with 8 tasks and 15 datasets. We first construct the large-scale multi-task instruction data considering a variety of financial tasks, financial document types, and financial data modalities. We then propose a financial LLM called FinMA by fine-tuning LLaMA with the constructed dataset to be able to follow instructions for various financial tasks. To support the evaluation of financial LLMs, we propose a standardized benchmark that covers a set of critical financial tasks, including six financial NLP tasks and two financial prediction tasks. With this benchmark, we conduct a detailed analysis of FinMA and several existing LLMs, uncovering their strengths and weaknesses in handling critical financial tasks. The model, datasets, benchmark, and experimental results are open-sourced to facilitate future research in financial AI.},
|
56 |
+
booktitle = {Proceedings of the 37th International Conference on Neural Information Processing Systems},
|
57 |
+
articleno = {1454},
|
58 |
+
numpages = {16},
|
59 |
+
location = {New Orleans, LA, USA},
|
60 |
+
series = {NIPS '23}
|
61 |
}`,
|
62 |
type: "main",
|
63 |
},
|
|
|
67 |
{
|
68 |
title: "MultiFin: Instruction-Following Evaluation",
|
69 |
authors: "Zhou et al.",
|
70 |
+
citation: `@inproceedings{jorgensen-etal-2023-multifin,
|
71 |
+
title = "{M}ulti{F}in: A Dataset for Multilingual Financial {NLP}",
|
72 |
+
author = "J{\o}rgensen, Rasmus and
|
73 |
+
Brandt, Oliver and
|
74 |
+
Hartmann, Mareike and
|
75 |
+
Dai, Xiang and
|
76 |
+
Igel, Christian and
|
77 |
+
Elliott, Desmond",
|
78 |
+
editor = "Vlachos, Andreas and
|
79 |
+
Augenstein, Isabelle",
|
80 |
+
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
|
81 |
+
month = may,
|
82 |
+
year = "2023",
|
83 |
+
address = "Dubrovnik, Croatia",
|
84 |
+
publisher = "Association for Computational Linguistics",
|
85 |
+
url = "https://aclanthology.org/2023.findings-eacl.66/",
|
86 |
+
doi = "10.18653/v1/2023.findings-eacl.66",
|
87 |
+
pages = "894--909",
|
88 |
+
abstract = "Financial information is generated and distributed across the world, resulting in a vast amount of domain-specific multilingual data. Multilingual models adapted to the financial domain would ease deployment when an organization needs to work with multiple languages on a regular basis. For the development and evaluation of such models, there is a need for multilingual financial language processing datasets. We describe MultiFin {--} a publicly available financial dataset consisting of real-world article headlines covering 15 languages across different writing systems and language families. The dataset consists of hierarchical label structure providing two classification tasks: multi-label and multi-class. We develop our annotation schema based on a real-world application and annotate our dataset using both {\textquoteleft}label by native-speaker' and {\textquoteleft}translate-then-label' approaches. The evaluation of several popular multilingual models, e.g., mBERT, XLM-R, and mT5, show that although decent accuracy can be achieved in high-resource languages, there is substantial room for improvement in low-resource languages."
|
89 |
}`,
|
90 |
+
url: "https://aclanthology.org/2023.findings-eacl.66/#:~:text=We%20describe%20MultiFin%20%2D%2D%20a,%2Dlabel%20and%20multi%2Dclass.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
},
|
92 |
];
|
93 |
|
|
|
175 |
<Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
|
176 |
<PageHeader
|
177 |
title="Citation Information"
|
178 |
+
subtitle="How to cite the Open Greek Financial LLM Leaderboard in your work"
|
179 |
/>
|
180 |
|
181 |
<Alert severity="info" sx={{ mb: 4 }}>
|