Spaces:

llm-jp
/

open-japanese-llm-leaderboard

Running on CPU Upgrade

App Files Files Community

shigeki Ishida commited on Nov 1, 2024

Commit

67542c9

1 Parent(s): b4dce55

Add parquet file support

Browse files

Files changed (3) hide show

app.py +5 -1
src/leaderboard/read_evals.py +63 -33
src/populate.py +0 -1

app.py CHANGED Viewed

@@ -89,7 +89,11 @@ except Exception:
     FAILED_EVAL_QUEUE_DF,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()

     FAILED_EVAL_QUEUE_DF,
 ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+# ORIGINAL_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+# Get dataframes
+results_path = "eval-results/leaderboard.parquet"
+ORIGINAL_DF = get_leaderboard_df(results_path, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 MAX_MODEL_SIZE = ORIGINAL_DF["#Params (B)"].max()

src/leaderboard/read_evals.py CHANGED Viewed

@@ -5,6 +5,7 @@ from dataclasses import dataclass
 from decimal import Decimal
 import dateutil
 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
@@ -37,9 +38,12 @@ class EvalResult:
     @classmethod
     def init_from_json_file(self, json_filepath):
-        """Inits the result from the specific model result file"""
-        with open(json_filepath) as fp:
-            data = json.load(fp)
         config = data.get("config")
         metainfo = config.get("metainfo", {})
@@ -183,35 +187,63 @@ def get_request_file_for_model(requests_path, model_name, precision):
 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
-    """From the path of the results folder root, extract all needed info for results"""
-    model_result_filepaths = []
-    for root, _, files in os.walk(results_path):
-        # We should only have json files in model results
-        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
-            continue
-        # Sort the files by date
-        try:
-            files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
-        except dateutil.parser._parser.ParserError:
-            files = [files[-1]]
-        for file in files:
-            model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
-    for model_result_filepath in model_result_filepaths:
-        # Creation of result
-        eval_result = EvalResult.init_from_json_file(model_result_filepath)
-        eval_result.update_with_request_file(requests_path)
-        # Store results of same eval together
-        eval_name = eval_result.eval_name
-        if eval_name in eval_results.keys():
-            eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
-        else:
-            eval_results[eval_name] = eval_result
     results = []
     for v in eval_results.values():
@@ -220,7 +252,5 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
             results.append(v)
         except KeyError:  # not all eval values present
             continue
-    # print(f"Processing file: {model_result_filepath}")
-    # print(f"Eval result: {eval_result.to_dict()}")
     return results

 from decimal import Decimal
 import dateutil
+import pandas as pd
 from src.display.formatting import make_clickable_model
 from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
     @classmethod
     def init_from_json_file(self, json_filepath):
+        """Inits the result from the specific model result file or dict"""
+        if isinstance(json_filepath, dict):
+            data = json_filepath
+        else:
+            with open(json_filepath) as fp:
+                data = json.load(fp)
         config = data.get("config")
         metainfo = config.get("metainfo", {})
 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
+    """From the path of the results folder root or parquet file, extract all needed info for results"""
     eval_results = {}
+    if results_path.endswith(".parquet"):
+        df = pd.read_parquet(results_path)
+        for _, row in df.iterrows():
+            data = {
+                "scores": {
+                    col.replace("scores.", ""): str(row[col]) for col in df.columns if col.startswith("scores.")
+                },
+                "config": {
+                    "model_name": row.get("config.model.pretrained_model_name_or_path"),
+                    "model": {
+                        "dtype": row.get("config.model.dtype"),
+                        "revision": row.get("config.model.revision"),
+                        "_target_": row.get("config.model._target_"),
+                    },
+                    "metainfo": {
+                        "num_few_shots": row.get("config.metainfo.num_few_shots"),
+                        "version": row.get("config.metainfo.version"),
+                    },
+                    "pipeline_kwargs": {"add_special_tokens": row.get("config.pipeline_kwargs.add_special_tokens")},
+                },
+            }
+            eval_result = EvalResult.init_from_json_file(data)
+            eval_result.update_with_request_file(requests_path)
+            eval_name = eval_result.eval_name
+            if eval_name in eval_results:
+                eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
+            else:
+                eval_results[eval_name] = eval_result
+    else:
+        # JSON
+        model_result_filepaths = []
+        for root, _, files in os.walk(results_path):
+            if len(files) == 0 or any([not f.endswith(".json") for f in files]):
+                continue
+            try:
+                files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
+            except dateutil.parser._parser.ParserError:
+                files = [files[-1]]
+            for file in files:
+                model_result_filepaths.append(os.path.join(root, file))
+        eval_results = {}
+        for model_result_filepath in model_result_filepaths:
+            eval_result = EvalResult.init_from_json_file(model_result_filepath)
+            eval_result.update_with_request_file(requests_path)
+            eval_name = eval_result.eval_name
+            if eval_name in eval_results.keys():
+                eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
+            else:
+                eval_results[eval_name] = eval_result
     results = []
     for v in eval_results.values():
             results.append(v)
         except KeyError:  # not all eval values present
             continue
     return results

src/populate.py CHANGED Viewed

@@ -14,7 +14,6 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
     # Add a row ID column
     df[AutoEvalColumn.row_id.name] = range(len(df))

     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
     # Add a row ID column
     df[AutoEvalColumn.row_id.name] = range(len(df))