|
from functools import partial |
|
import json |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
import pandas as pd |
|
import numpy as np |
|
|
|
DATASETS = { |
|
"samsum": "SAMSum", |
|
"cnn": "CNN/DailyMail", |
|
"xsum": "XSum", |
|
"billsum": "BillSum", |
|
"multinews": "Multi-News", |
|
} |
|
|
|
MODELS = [ |
|
"PEGASUS", |
|
"PEGASUS-X", |
|
"MTL-ABS", |
|
"BART SDPT/DAPT/TAPT", |
|
"Prefix-tuning", |
|
"ExtraPhrase", |
|
"Primera", |
|
"Se3", |
|
"DADS", |
|
"LML-LRS", |
|
"PSP", |
|
"Athena", |
|
"SPEC", |
|
"Z-Code++", |
|
"DIONYSUS", |
|
"COMPO", |
|
"UNISUMM", |
|
"Centrum", |
|
"ParaSum", |
|
"EFLRAS", |
|
] |
|
|
|
REPOS_PAPERS = { |
|
"PEGASUS": "https://github.com/google-research/pegasus", |
|
"PEGASUS-X": "https://github.com/google-research/pegasus", |
|
"MTL-ABS": "https://github.com/YiSyuanChen/MTL-ABS", |
|
"BART SDPT/DAPT/TAPT": "https://github.com/TysonYu/AdaptSum", |
|
"Prefix-tuning": "https://github.com/XiangLi1999/PrefixTuning", |
|
"ExtraPhrase": "https://github.com/loem-ms/ExtraPhrase", |
|
"Primera": "https://github.com/allenai/PRIMER", |
|
"Se3": "https://ojs.aaai.org/index.php/AAAI/article/view/21357", |
|
"DADS": "https://aclanthology.org/2022.findings-naacl.53.pdf", |
|
"LML-LRS": "https://dl.acm.org/doi/pdf/10.1145/3477495.3531908", |
|
"PSP": "https://aclanthology.org/2022.coling-1.553.pdf", |
|
"Athena": "https://www.sciencedirect.com/science/article/pii/S0925231223004794?casa_token=ptLMl-LZLbQAAAAA:9Aq7HEUf6dRrIg5MTj4hZm2eaWJSeTDKmnXxS52fkZ131ejkYHdZgGimL0TFCFXy57qF1k9KTKE", |
|
"SPEC": "https://github.com/YiSyuanChen/SPEC", |
|
"Z-Code++": "https://arxiv.org/pdf/2208.09770.pdf", |
|
"DIONYSUS": "https://arxiv.org/pdf/2212.10018.pdf", |
|
"COMPO": "https://github.com/ozyyshr/Compo", |
|
"UNISUMM": "https://github.com/microsoft/UniSumm", |
|
"Centrum": "https://github.com/ratishsp/centrum", |
|
"ParaSum": "https://link.springer.com/chapter/10.1007/978-3-031-40289-0_9", |
|
"EFLRAS": "https://github.com/NLPlab-skku/SummaryXAI-QA/tree/main/Low-Resource-Sum", |
|
} |
|
|
|
TAXONOMY = [ |
|
"Pre-training", |
|
"Centroid-based pre-training", |
|
"Data augmentation", |
|
"Segmentation", |
|
"Meta-learning", |
|
"Meta-transfer", |
|
"Extractive summarization", |
|
"Prefix tuning", |
|
] |
|
|
|
MODEL_TO_TAXONOMY = [ |
|
TAXONOMY[0], |
|
TAXONOMY[0], |
|
TAXONOMY[5], |
|
TAXONOMY[0], |
|
TAXONOMY[7], |
|
TAXONOMY[2], |
|
TAXONOMY[0], |
|
TAXONOMY[3], |
|
TAXONOMY[2], |
|
TAXONOMY[4], |
|
TAXONOMY[0], |
|
TAXONOMY[3], |
|
TAXONOMY[5], |
|
TAXONOMY[0], |
|
TAXONOMY[0], |
|
TAXONOMY[2], |
|
TAXONOMY[0], |
|
TAXONOMY[1], |
|
TAXONOMY[6], |
|
TAXONOMY[5], |
|
] |
|
|
|
model_tax = np.array([MODELS, MODEL_TO_TAXONOMY]).transpose() |
|
|
|
SAMSUM_DATA = [ |
|
[model_tax[14][0], "base", model_tax[14][1], 0, 0, 39.60, 15.40, 30.10], |
|
[model_tax[14][0], "large", model_tax[14][1], 0, 0, 41.30, 16.20, 30.90], |
|
[model_tax[3][0], "SDPT w/RecAdam", model_tax[3][1], 300, 0, 45.23, 19.43, 35.37], |
|
[model_tax[3][0], "DAPT", model_tax[3][1], 300, 0, 41.22, 17.88, 32.40], |
|
[model_tax[3][0], "TAPT w/RecAdam", model_tax[3][1], 300, 0, 41.34, 17.88, 32.31], |
|
[model_tax[13][0], "large", model_tax[13][1], 0, 0, 26.50, 7.90, 20.50], |
|
[model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.27, 17.40, 33.70], |
|
[model_tax[13][0], "large", model_tax[13][1], 100, 0, 47.60, 22.30, 38.70], |
|
[model_tax[16][0], "", model_tax[16][1], 0, 0, 22.17, 6.88, 17.08], |
|
[model_tax[16][0], "", model_tax[16][1], 10, 0, 43.89, 18.53, 34.76], |
|
[model_tax[16][0], "", model_tax[16][1], 100, 0, 46.93, 20.65, 37.28], |
|
[model_tax[8][0], "", model_tax[8][1], 10, 0, 32.50, 12.00, 27.00], |
|
[model_tax[8][0], "", model_tax[8][1], 100, 0, 43.90, 19.70, 36.10], |
|
[model_tax[15][0], "base, self-training", model_tax[15][1], 147, 0, 45.42, 21.23, 41.42], |
|
[model_tax[15][0], "large, self-training", model_tax[15][1], 147, 0, 49.78, 24.65, 45.41], |
|
[model_tax[15][0], "base, joint-training", model_tax[15][1], 147, 0, 44.89, 20.64, 40.58], |
|
[model_tax[15][0], "large, joint-training", model_tax[15][1], 147, 0, 49.14, 23.45, 44.35], |
|
[model_tax[12][0], "", model_tax[12][1], 10, 0, 46.06, 20.90, 40.34], |
|
[model_tax[12][0], "", model_tax[12][1], 100, 0, 51.94, 24.75, 46.97], |
|
] |
|
|
|
CNN_DATA = [ |
|
[model_tax[13][0], "large", model_tax[13][1], 0, 0, 40.00, 17.30, 25.30], |
|
[model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.00, 17.30, 25.30], |
|
[model_tax[13][0], "large", model_tax[13][1], 100, 0, 41.10, 18.40, 27.50], |
|
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 32.90, 13.28, 29.38], |
|
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 37.25, 15.84, 33.49], |
|
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 40.28, 18.21, 37.03], |
|
[model_tax[1][0], "large", model_tax[1][1], 0, 0, 30.22, 11.88, 28.31], |
|
[model_tax[1][0], "large", model_tax[1][1], 10, 0, 36.12, 13.70, 30.26], |
|
[model_tax[1][0], "large", model_tax[1][1], 100, 0, 38.40, 17.02, 36.75], |
|
[model_tax[10][0], "", model_tax[10][1], 300, 0, 38.31, 15.94, 25.41], |
|
[model_tax[5][0], "", model_tax[5][1], 1000, 0, 34.47, 12.91, 31.36], |
|
[model_tax[9][0], "", model_tax[9][1], 10, 0, 39.34, 16.53, 25.40], |
|
[model_tax[9][0], "", model_tax[9][1], 100, 0, 39.94, 16.96, 26.09], |
|
[model_tax[19][0], "", model_tax[19][1], 10, 0, 39.50, 16.80, 25.72], |
|
[model_tax[19][0], "", model_tax[19][1], 100, 0, 40.53, 17.61, 26.64], |
|
[model_tax[18][0], "", model_tax[18][1], 200, 0, 40.81, 17.78, 36.94], |
|
] |
|
|
|
BILLSUM_DATA = [ |
|
[model_tax[0][0], "large", model_tax[0][0], 0, 0, 41.02, 17.44, 25.24], |
|
[model_tax[0][0], "large", model_tax[0][0], 10, 0, 40.48, 18.49, 27.27], |
|
[model_tax[0][0], "large", model_tax[0][0], 100, 0, 44.78, 26.40, 34.40], |
|
[model_tax[1][0], "large", model_tax[1][1], 0, 0, 41.32, 18.04, 25.11], |
|
[model_tax[1][0], "large", model_tax[1][1], 10, 0, 42.55, 18.97, 26.92], |
|
[model_tax[1][0], "large", model_tax[1][1], 100, 0, 46.48, 27.77, 36.53], |
|
[model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 10, 0, 46.94, 23.04, 29.29], |
|
[model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 100, 0, 50.4, 27.73, 33.74], |
|
[model_tax[11][0], "", model_tax[11][1], 10, 0, 47.57, 24.14, 30.35], |
|
[model_tax[11][0], "", model_tax[11][1], 100, 0, 51.59, 29.36, 35.04], |
|
[model_tax[9][0], "", model_tax[9][1], 10, 0, 46.64, 25.07, 30.90], |
|
[model_tax[9][0], "", model_tax[9][1], 100, 0, 48.18, 27.18, 33.28], |
|
[model_tax[2][0], "", model_tax[2][1], 10, 0, 41.22, 18.61, 26.33], |
|
[model_tax[2][0], "", model_tax[2][1], 100, 0, 45.29, 22.74, 29.56], |
|
[model_tax[19][0], "", model_tax[19][1], 10, 0, 46.64, 25.07, 30.90], |
|
[model_tax[19][0], "", model_tax[19][1], 100, 0, 48.18, 27.18, 33.28], |
|
] |
|
|
|
XSUM_DATA = [ |
|
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 19.27, 3.00, 12.72], |
|
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 19.39, 3.45, 14.02], |
|
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 39.07, 16.44, 31.27], |
|
[model_tax[10][0], "", model_tax[10][1], 300, 0, 32.86, 11.27, 25.64], |
|
[model_tax[16][0], "", model_tax[16][1], 0, 0, 20.72, 3.62, 16.56], |
|
[model_tax[16][0], "", model_tax[16][1], 10, 0, 26.10, 7.20, 19.92], |
|
[model_tax[16][0], "", model_tax[16][1], 100, 0, 33.33, 11.36, 25.85], |
|
[model_tax[9][0], "", model_tax[9][1], 10, 0, 32.35, 11.86, 25.33], |
|
[model_tax[9][0], "", model_tax[9][1], 100, 0, 35.54, 13.94, 27.79], |
|
[model_tax[19][0], "", model_tax[19][1], 10, 0, 32.65, 12.10, 25.82], |
|
[model_tax[19][0], "", model_tax[19][1], 100, 0, 36.51, 14.55, 29.01], |
|
[model_tax[12][0], "", model_tax[12][1], 10, 0, 32.74, 10.90, 24.86], |
|
[model_tax[12][0], "", model_tax[12][1], 100, 0, 35.69, 12.88, 27.25], |
|
[model_tax[18][0], "", model_tax[18][1], 1000, 0, 21.15, 3.08, 15.91], |
|
[model_tax[4][0], "", model_tax[4][1], 100, 0, 35.20, 13.30, 28.10], |
|
] |
|
|
|
MN_DATA = [ |
|
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 36.54, 10.52, 18.67], |
|
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 39.79, 12.56, 20.06], |
|
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 41.04, 13.88, 21.52], |
|
[model_tax[6][0], "", model_tax[6][1], 0, 0, 39.09, 13.91, 19.19], |
|
[model_tax[6][0], "", model_tax[6][1], 10, 0, 44.02, 15.54, 22.03], |
|
[model_tax[6][0], "", model_tax[6][1], 100, 0, 46.01, 16.76, 22.91], |
|
[model_tax[17][0], "", model_tax[17][1], 0, 0, 43.5, 15.7, 22.4], |
|
[model_tax[17][0], "", model_tax[17][1], 10, 0, 43.4, 16.6, 22.2], |
|
[model_tax[17][0], "", model_tax[17][1], 100, 0, 45.7, 16.8, 23.2], |
|
[model_tax[19][0], "", model_tax[19][1], 10, 0, 43.60, 14.85, 20.70], |
|
[model_tax[19][0], "", model_tax[19][1], 100, 0, 45.55, 16.01, 22.12], |
|
[model_tax[2][0], "", model_tax[2][1], 10, 0, 38.88, 12.78, 19.88], |
|
[model_tax[2][0], "", model_tax[2][1], 100, 0, 39.64, 13.64, 20.45], |
|
] |
|
|
|
COL_NAMES = [ |
|
"Rank", |
|
"Model", |
|
"Additional info", |
|
"Taxonomy", |
|
"Training samples", |
|
"ROUGE", |
|
"ROUGE-1", |
|
"ROUGE-2", |
|
"ROUGE-L", |
|
] |
|
|
|
data = { |
|
"samsum": pd.DataFrame(SAMSUM_DATA), |
|
"cnn": pd.DataFrame(CNN_DATA), |
|
"billsum": pd.DataFrame(BILLSUM_DATA), |
|
"xsum": pd.DataFrame(XSUM_DATA), |
|
"multinews": pd.DataFrame(MN_DATA), |
|
} |
|
|
|
def make_clickable(text, url): |
|
return "<u>[{}]({})</u>".format(text, url) |
|
|
|
for dataset in data: |
|
data[dataset].columns = COL_NAMES[1:] |
|
data[dataset]["ROUGE"] = np.around(np.mean(data[dataset][["ROUGE-1", "ROUGE-2", "ROUGE-L"]], axis=1), decimals=2) |
|
data[dataset].sort_values("ROUGE", ascending=False, inplace=True) |
|
|
|
data[dataset].insert(0, COL_NAMES[0], range(1, 1 + len(data[dataset]))) |
|
|
|
data[dataset]["Model"] = data[dataset]["Model"].apply(lambda x: make_clickable(x, REPOS_PAPERS[x])) |
|
print(data[dataset]["Model"]) |
|
|
|
|
|
|
|
NUM_DATASETS = len(set(DATASETS)) |
|
NUM_MODELS = len(set(MODELS)) |
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
table > thead { |
|
white-space: normal |
|
} |
|
table { |
|
--cell-width-1: 210px |
|
} |
|
table > tbody > tr > td:nth-child(2) > div { |
|
overflow-x: auto |
|
} |
|
""" |
|
|
|
block = gr.Blocks(css=css) |
|
with block: |
|
gr.Markdown(f""" |
|
This is a leaderboard for Few-Shot Summarization (FSS). |
|
|
|
- **Total Datasets**: {NUM_DATASETS} |
|
- **Total Models**: {NUM_MODELS} |
|
- **Metric**: ROUGE Score |
|
|
|
For more information about the metrics and models employed and to gain a greater understanding of the general taxonomy of FSS, please refer to our [Survey on FSS](the paper will be published soon 🤗). |
|
""") |
|
|
|
with gr.Tabs(): |
|
for dataset in data: |
|
dataset_name = DATASETS[dataset] |
|
with gr.TabItem(dataset_name): |
|
with gr.Row(): |
|
gr.Markdown(f""" |
|
**{dataset_name}** leaderboard |
|
- **ROUGE** is the average of ROUGE-1, ROUGE-2 and ROUGE-L |
|
- **RANK** is defined following ROUGE column values |
|
""") |
|
with gr.Row(): |
|
data_classification = gr.components.Dataframe( |
|
data[dataset], |
|
datatype=["markdown", "markdown", "markdown", "number", "number", "number", "number", "number"], |
|
type="pandas", |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
block.queue(max_size=10) |
|
block.launch() |