Open-Greek-Financial-LLM-Leaderboard

Running

App Files Files Community

Jimin Huang commited on Jan 9

Commit

15f2c01

1 Parent(s): e3acf44

fix: alter column name

Browse files

Files changed (7) hide show

backend/app/asgi.py +3 -3
backend/app/main.py +2 -2
frontend/public/index.html +8 -8
frontend/src/components/Footer/Footer.js +3 -3
frontend/src/components/Navigation/Navigation.js +0 -30
frontend/src/pages/LeaderboardPage/LeaderboardPage.js +1 -1
frontend/src/pages/QuotePage/QuotePage.js +42 -96

backend/app/asgi.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-ASGI entry point for the Open LLM Leaderboard API.
 """
 import os
 import uvicorn
@@ -65,7 +65,7 @@ logger = logging.getLogger("app")
 # Create FastAPI application
 app = FastAPI(
-    title="Open LLM Leaderboard",
     version="1.0.0",
     docs_url="/docs",
 )
@@ -103,4 +103,4 @@ async def startup_event():
     # Setup cache
     setup_cache()
-    logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))

 """
+ASGI entry point for the Open Greek Financial LLM Leaderboard API.
 """
 import os
 import uvicorn
 # Create FastAPI application
 app = FastAPI(
+    title="Open Greek Financial LLM Leaderboard",
     version="1.0.0",
     docs_url="/docs",
 )
     # Setup cache
     setup_cache()
+    logger.info(LogFormatter.success("FastAPI Cache initialized with in-memory backend"))

backend/app/main.py CHANGED Viewed

@@ -6,7 +6,7 @@ import logging
 setup_logging()
 logger = logging.getLogger(__name__)
-app = FastAPI(title="Open LLM Leaderboard API")
 @app.on_event("startup")
 async def startup_event():
@@ -15,4 +15,4 @@ async def startup_event():
 # Import and include routers after app initialization
 from app.api import models, votes
 app.include_router(models.router, prefix="/api", tags=["models"])
-app.include_router(votes.router, prefix="/api", tags=["votes"])

 setup_logging()
 logger = logging.getLogger(__name__)
+app = FastAPI(title="Open Greek Financial LLM Leaderboard API")
 @app.on_event("startup")
 async def startup_event():
 # Import and include routers after app initialization
 from app.api import models, votes
 app.include_router(models.router, prefix="/api", tags=["models"])
+app.include_router(votes.router, prefix="/api", tags=["votes"])

frontend/public/index.html CHANGED Viewed

@@ -9,22 +9,22 @@
     />
     <meta
       name="description"
-      content="Interactive leaderboard tracking and comparing open-source Large Language Models across multiple benchmarks: IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
     />
     <!-- Open Graph / Facebook -->
     <meta property="og:type" content="website" />
     <meta
       property="og:url"
-      content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
     />
     <meta
       property="og:title"
-      content="Open LLM Leaderboard - Compare Open Source Large Language Models"
     />
     <meta
       property="og:description"
-      content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
     />
     <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
@@ -32,15 +32,15 @@
     <meta property="twitter:card" content="summary_large_image" />
     <meta
       property="twitter:url"
-      content="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
     />
     <meta
       property="twitter:title"
-      content="Open LLM Leaderboard - Compare Open Source Large Language Models"
     />
     <meta
       property="twitter:description"
-      content="Interactive leaderboard for comparing LLM performance across multiple benchmarks. Features real-time filtering, community voting, and comprehensive model analysis with benchmarks like IFEval, BBH, MATH, GPQA, MUSR, and MMLU-PRO."
     />
     <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
     <!--
@@ -53,7 +53,7 @@
       Learn how to configure a non-root public URL by running `npm run build`.
     -->
     <title>
-      Open LLM Leaderboard - Compare Open Source Large Language Models
     </title>
     <link
       href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"

     />
     <meta
       name="description"
+      content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
     />
     <!-- Open Graph / Facebook -->
     <meta property="og:type" content="website" />
     <meta
       property="og:url"
+      content="https://huggingface.co/spaces/TheFinAI/open_greek_finance_llm_leaderboard"
     />
     <meta
       property="og:title"
+      content="Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area"
     />
     <meta
       property="og:description"
+      content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
     />
     <meta property="og:image" content="%PUBLIC_URL%/og-image.png" />
     <meta property="twitter:card" content="summary_large_image" />
     <meta
       property="twitter:url"
+      content="https://huggingface.co/spaces/TheFinAI/open_greek_finance_llm_leaderboard"
     />
     <meta
       property="twitter:title"
+      content="Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area"
     />
     <meta
       property="twitter:description"
+      content="Interactive leaderboard for comparing LLM performance across Greek financial benchmarks."
     />
     <meta property="twitter:image" content="%PUBLIC_URL%/og-image.png" />
     <!--
       Learn how to configure a non-root public URL by running `npm run build`.
     -->
     <title>
+      Open Greek Financial LLM Leaderboard - Compare Large Language Models in Greek Financial Area
     </title>
     <link
       href="https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@400;600;700&display=swap"

frontend/src/components/Footer/Footer.js CHANGED Viewed

@@ -12,15 +12,15 @@ const Footer = () => {
       }}
     >
       <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
-        © 2024 Hugging Face - Open LLM Leaderboard - Made with 🤗 by the HF team
         -{" "}
         <Link
-          href="https://huggingface.co"
           target="_blank"
           rel="noopener noreferrer"
           color="inherit"
         >
-          huggingface.co
         </Link>
       </Typography>
     </Box>

       }}
     >
       <Typography variant="body2" color="text.secondary" sx={{ mx: 4 }}>
+        © 2024 The Fin AI - Open Greek Financial LLM Leaderboard - Made by the Fin AI community and based on 🤗.
         -{" "}
         <Link
+          href="https://thefin.ai"
           target="_blank"
           rel="noopener noreferrer"
           color="inherit"
         >
+          thefin.ai
         </Link>
       </Typography>
     </Box>

frontend/src/components/Navigation/Navigation.js CHANGED Viewed

@@ -292,36 +292,6 @@ const Navigation = ({ onToggleTheme, mode }) => {
                   External links
                 </Typography>
               </Box>
-              <MenuItem
-                component={MuiLink}
-                href="https://huggingface.co/spaces/open-llm-leaderboard/comparator"
-                target="_blank"
-                sx={{
-                  "& svg": {
-                    ml: "auto",
-                    fontSize: "0.875rem",
-                    opacity: 0.6,
-                  },
-                }}
-              >
-                Compare models
-                <OpenInNewIcon />
-              </MenuItem>
-              <MenuItem
-                component={MuiLink}
-                href="https://huggingface.co/docs/leaderboards/open_llm_leaderboard/about"
-                target="_blank"
-                sx={{
-                  "& svg": {
-                    ml: "auto",
-                    fontSize: "0.875rem",
-                    opacity: 0.6,
-                  },
-                }}
-              >
-                About
-                <OpenInNewIcon />
-              </MenuItem>
             </Menu>
             <Tooltip

                   External links
                 </Typography>
               </Box>
             </Menu>
             <Tooltip

frontend/src/pages/LeaderboardPage/LeaderboardPage.js CHANGED Viewed

@@ -33,7 +33,7 @@ function LeaderboardPage() {
         <Logo height="80px" />
       </Box>
       <PageHeader
-        title="Open LLM Leaderboard"
         subtitle={
           <>
             Comparing Large Language Models in an{" "}

         <Logo height="80px" />
       </Box>
       <PageHeader
+        title="Open Greek Financial LLM Leaderboard"
         subtitle={
           <>
             Comparing Large Language Models in an{" "}

frontend/src/pages/QuotePage/QuotePage.js CHANGED Viewed

@@ -13,15 +13,14 @@ import PageHeader from "../../components/shared/PageHeader";
 const citations = [
   {
-    title: "Open LLM Leaderboard v2",
     authors:
-      "Clémentine Fourrier, Nathan Habib, Alina Lozovskaya, Konrad Szafer, Thomas Wolf",
-    citation: `@misc{open-llm-leaderboard-v2,
-  author = {Clémentine Fourrier and Nathan Habib and Alina Lozovskaya and Konrad Szafer and Thomas Wolf},
-  title = {Open LLM Leaderboard v2},
-  year = {2024},
-  publisher = {Hugging Face},
-  howpublished = "\\url{https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard}",
 }`,
     type: "main",
   },
@@ -44,15 +43,21 @@ const citations = [
 const priorWork = [
   {
-    title: "Open LLM Leaderboard v1",
     authors:
-      "Edward Beeching, Clémentine Fourrier, Nathan Habib, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf",
-    citation: `@misc{open-llm-leaderboard-v1,
-  author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
-  title = {Open LLM Leaderboard (2023-2024)},
-  year = {2023},
-  publisher = {Hugging Face},
-  howpublished = "\\url{https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard}"
 }`,
     type: "main",
   },
@@ -62,86 +67,27 @@ const benchmarks = [
   {
     title: "MultiFin: Instruction-Following Evaluation",
     authors: "Zhou et al.",
-    citation: `@misc{zhou2023instructionfollowingevaluationlargelanguage,
-  title={Instruction-Following Evaluation for Large Language Models},
-  author={Jeffrey Zhou and Tianjian Lu and Swaroop Mishra and Siddhartha Brahma and Sujoy Basu and Yi Luan and Denny Zhou and Le Hou},
-  year={2023},
-  eprint={2311.07911},
-  archivePrefix={arXiv},
-  primaryClass={cs.CL},
-  url={https://arxiv.org/abs/2311.07911},
 }`,
-    url: "https://arxiv.org/abs/2311.07911",
-  },
-  {
-    title: "BBH: Big-Bench Hard",
-    authors: "Suzgun et al.",
-    citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
-  title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
-  author={Mirac Suzgun and Nathan Scales and Nathanael Schärli and Sebastian Gehrmann and Yi Tay and Hyung Won Chung and Aakanksha Chowdhery and Quoc V. Le and Ed H. Chi and Denny Zhou and Jason Wei},
-  year={2022},
-  eprint={2210.09261},
-  archivePrefix={arXiv},
-  primaryClass={cs.CL},
-  url={https://arxiv.org/abs/2210.09261},
-}`,
-    url: "https://arxiv.org/abs/2210.09261",
-  },
-  {
-    title: "MATH: Mathematics Aptitude Test of Heuristics - Level 5",
-    authors: "Hendrycks et al.",
-    citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
-  title={Measuring Mathematical Problem Solving With the MATH Dataset},
-  author={Dan Hendrycks and Collin Burns and Saurav Kadavath and Akul Arora and Steven Basart and Eric Tang and Dawn Song and Jacob Steinhardt},
-  year={2021},
-  eprint={2103.03874},
-  archivePrefix={arXiv},
-  primaryClass={cs.LG},
-  url={https://arxiv.org/abs/2103.03874},
-}`,
-    url: "https://arxiv.org/abs/2103.03874",
-  },
-  {
-    title: "GPQA: Graduate-Level Google-Proof Q&A",
-    authors: "Rein et al.",
-    citation: `@misc{rein2023gpqagraduatelevelgoogleproofqa,
-  title={GPQA: A Graduate-Level Google-Proof Q&A Benchmark},
-  author={David Rein and Betty Li Hou and Asa Cooper Stickland and Jackson Petty and Richard Yuanzhe Pang and Julien Dirani and Julian Michael and Samuel R. Bowman},
-  year={2023},
-  eprint={2311.12022},
-  archivePrefix={arXiv},
-  primaryClass={cs.AI},
-  url={https://arxiv.org/abs/2311.12022},
-}`,
-    url: "https://arxiv.org/abs/2311.12022",
-  },
-  {
-    title: "MuSR: Multistep Soft Reasoning",
-    authors: "Sprague et al.",
-    citation: `@misc{sprague2024musrtestinglimitschainofthought,
-  title={MuSR: Testing the Limits of Chain-of-thought with Multistep Soft Reasoning},
-  author={Zayne Sprague and Xi Ye and Kaj Bostrom and Swarat Chaudhuri and Greg Durrett},
-  year={2024},
-  eprint={2310.16049},
-  archivePrefix={arXiv},
-  primaryClass={cs.CL},
-  url={https://arxiv.org/abs/2310.16049},
-}`,
-    url: "https://arxiv.org/abs/2310.16049",
-  },
-  {
-    title: "MMLU-Pro: Massive Multitask Language Understanding Professional",
-    authors: "Wang et al.",
-    citation: `@misc{wang2024mmluprorobustchallengingmultitask,
-  title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},
-  author={Yubo Wang and Xueguang Ma and Ge Zhang and Yuansheng Ni and Abhranil Chandra and Shiguang Guo and Weiming Ren and Aaran Arulraj and Xuan He and Ziyan Jiang and Tianle Li and Max Ku and Kai Wang and Alex Zhuang and Rongqi Fan and Xiang Yue and Wenhu Chen},
-  year={2024},
-  eprint={2406.01574},
-  archivePrefix={arXiv},
-  primaryClass={cs.CL},
-  url={https://arxiv.org/abs/2406.01574},
-}`,
-    url: "https://arxiv.org/abs/2406.01574",
   },
 ];
@@ -229,7 +175,7 @@ function QuotePage() {
     <Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
       <PageHeader
         title="Citation Information"
-        subtitle="How to cite the Open LLM Leaderboard in your work"
       />
       <Alert severity="info" sx={{ mb: 4 }}>

 const citations = [
   {
+    title: "FinBen: A Holistic Financial Benchmark for Large Language Models",
     authors:
+      "Qianqian Xie et al.",
+    citation: `@article{xie2024finben,
+  title={The finben: An holistic financial benchmark for large language models},
+  author={Xie, Qianqian and Han, Weiguang and Chen, Zhengyu and Xiang, Ruoyu and Zhang, Xiao and He, Yueru and Xiao, Mengxi and Li, Dong and Dai, Yongfu and Feng, Duanyu and others},
+  journal={arXiv preprint arXiv:2402.12659},
+  year={2024}
 }`,
     type: "main",
   },
 const priorWork = [
   {
+    title: "PIXIU: a large language model, instruction data and evaluation benchmark for finance",
     authors:
+      "Qianqian Xie et al.",
+    citation: `@inproceedings{10.5555/3666122.3667576,
+author = {Xie, Qianqian and Han, Weiguang and Zhang, Xiao and Lai, Yanzhao and Peng, Min and Lopez-Lira, Alejandro and Huang, Jimin},
+title = {PIXIU: a large language model, instruction data and evaluation benchmark for finance},
+year = {2024},
+publisher = {Curran Associates Inc.},
+address = {Red Hook, NY, USA},
+abstract = {Although large language models (LLMs) have shown great performance in natural language processing (NLP) in the financial domain, there are no publicly available financially tailored LLMs, instruction tuning datasets, and evaluation benchmarks, which is critical for continually pushing forward the open-source development of financial artificial intelligence (AI). This paper introduces PIXIU, a comprehensive framework including the first financial LLM based on fine-tuning LLaMA with instruction data, the first instruction data with 128K data samples to support the fine-tuning, and an evaluation benchmark with 8 tasks and 15 datasets. We first construct the large-scale multi-task instruction data considering a variety of financial tasks, financial document types, and financial data modalities. We then propose a financial LLM called FinMA by fine-tuning LLaMA with the constructed dataset to be able to follow instructions for various financial tasks. To support the evaluation of financial LLMs, we propose a standardized benchmark that covers a set of critical financial tasks, including six financial NLP tasks and two financial prediction tasks. With this benchmark, we conduct a detailed analysis of FinMA and several existing LLMs, uncovering their strengths and weaknesses in handling critical financial tasks. The model, datasets, benchmark, and experimental results are open-sourced to facilitate future research in financial AI.},
+booktitle = {Proceedings of the 37th International Conference on Neural Information Processing Systems},
+articleno = {1454},
+numpages = {16},
+location = {New Orleans, LA, USA},
+series = {NIPS '23}
 }`,
     type: "main",
   },
   {
     title: "MultiFin: Instruction-Following Evaluation",
     authors: "Zhou et al.",
+    citation: `@inproceedings{jorgensen-etal-2023-multifin,
+    title = "{M}ulti{F}in: A Dataset for Multilingual Financial {NLP}",
+    author = "J{\o}rgensen, Rasmus  and
+      Brandt, Oliver  and
+      Hartmann, Mareike  and
+      Dai, Xiang  and
+      Igel, Christian  and
+      Elliott, Desmond",
+    editor = "Vlachos, Andreas  and
+      Augenstein, Isabelle",
+    booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
+    month = may,
+    year = "2023",
+    address = "Dubrovnik, Croatia",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.findings-eacl.66/",
+    doi = "10.18653/v1/2023.findings-eacl.66",
+    pages = "894--909",
+    abstract = "Financial information is generated and distributed across the world, resulting in a vast amount of domain-specific multilingual data. Multilingual models adapted to the financial domain would ease deployment when an organization needs to work with multiple languages on a regular basis. For the development and evaluation of such models, there is a need for multilingual financial language processing datasets. We describe MultiFin {--} a publicly available financial dataset consisting of real-world article headlines covering 15 languages across different writing systems and language families. The dataset consists of hierarchical label structure providing two classification tasks: multi-label and multi-class. We develop our annotation schema based on a real-world application and annotate our dataset using both {\textquoteleft}label by native-speaker' and {\textquoteleft}translate-then-label' approaches. The evaluation of several popular multilingual models, e.g., mBERT, XLM-R, and mT5, show that although decent accuracy can be achieved in high-resource languages, there is substantial room for improvement in low-resource languages."
 }`,
+    url: "https://aclanthology.org/2023.findings-eacl.66/#:~:text=We%20describe%20MultiFin%20%2D%2D%20a,%2Dlabel%20and%20multi%2Dclass.",
   },
 ];
     <Box sx={{ width: "100%", maxWidth: 1200, margin: "0 auto", padding: 4 }}>
       <PageHeader
         title="Citation Information"
+        subtitle="How to cite the Open Greek Financial LLM Leaderboard in your work"
       />
       <Alert severity="info" sx={{ mb: 4 }}>