Anonym Submission
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import partial
|
2 |
+
import json
|
3 |
+
|
4 |
+
# from datasets import load_dataset
|
5 |
+
import gradio as gr
|
6 |
+
# from huggingface_hub import get_hf_file_metadata, HfApi, hf_hub_download, hf_hub_url
|
7 |
+
# from huggingface_hub.repocard import metadata_load
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
DATASETS = {
|
12 |
+
"samsum": "SAMSum",
|
13 |
+
"cnn": "CNN/DailyMail",
|
14 |
+
"xsum": "XSum",
|
15 |
+
"billsum": "BillSum",
|
16 |
+
"multinews": "Multi-News",
|
17 |
+
}
|
18 |
+
|
19 |
+
MODELS = [
|
20 |
+
"PEGASUS", #0
|
21 |
+
"PEGASUS-X", #1
|
22 |
+
"MTL-ABS", #2
|
23 |
+
"BART SDPT/DAPT/TAPT", #3
|
24 |
+
"Prefix-tuning", #4
|
25 |
+
"ExtraPhrase", #5
|
26 |
+
"Primera", #6
|
27 |
+
"Se3", #7
|
28 |
+
"DADS", #8
|
29 |
+
"LML-LRS", #9
|
30 |
+
"PSP", #10
|
31 |
+
"Athena", #11
|
32 |
+
"SPEC", #12
|
33 |
+
"Z-Code++", #13
|
34 |
+
"DIONYSUS", #14
|
35 |
+
"COMPO", #15
|
36 |
+
"UNISUMM", #16
|
37 |
+
"Centrum", #17
|
38 |
+
"ParaSum", #18
|
39 |
+
"EFLRAS", #19
|
40 |
+
]
|
41 |
+
|
42 |
+
REPOS_PAPERS = {
|
43 |
+
"PEGASUS": "https://github.com/google-research/pegasus", #0
|
44 |
+
"PEGASUS-X": "https://github.com/google-research/pegasus", #1
|
45 |
+
"MTL-ABS": "https://github.com/YiSyuanChen/MTL-ABS", #2
|
46 |
+
"BART SDPT/DAPT/TAPT": "https://github.com/TysonYu/AdaptSum", #3
|
47 |
+
"Prefix-tuning": "https://github.com/XiangLi1999/PrefixTuning", #4
|
48 |
+
"ExtraPhrase": "https://github.com/loem-ms/ExtraPhrase", #5
|
49 |
+
"Primera": "https://github.com/allenai/PRIMER", #6
|
50 |
+
"Se3": "https://ojs.aaai.org/index.php/AAAI/article/view/21357", #7
|
51 |
+
"DADS": "https://aclanthology.org/2022.findings-naacl.53.pdf", #8
|
52 |
+
"LML-LRS": "https://dl.acm.org/doi/pdf/10.1145/3477495.3531908", #9
|
53 |
+
"PSP": "https://aclanthology.org/2022.coling-1.553.pdf", #10
|
54 |
+
"Athena": "https://www.sciencedirect.com/science/article/pii/S0925231223004794?casa_token=ptLMl-LZLbQAAAAA:9Aq7HEUf6dRrIg5MTj4hZm2eaWJSeTDKmnXxS52fkZ131ejkYHdZgGimL0TFCFXy57qF1k9KTKE", #11
|
55 |
+
"SPEC": "https://github.com/YiSyuanChen/SPEC", #12
|
56 |
+
"Z-Code++": "https://arxiv.org/pdf/2208.09770.pdf", #13
|
57 |
+
"DIONYSUS": "https://arxiv.org/pdf/2212.10018.pdf", #14
|
58 |
+
"COMPO": "https://github.com/ozyyshr/Compo", #15
|
59 |
+
"UNISUMM": "https://github.com/microsoft/UniSumm", #16
|
60 |
+
"Centrum": "https://github.com/ratishsp/centrum", #17
|
61 |
+
"ParaSum": "https://link.springer.com/chapter/10.1007/978-3-031-40289-0_9", #18
|
62 |
+
"EFLRAS": "https://github.com/NLPlab-skku/SummaryXAI-QA/tree/main/Low-Resource-Sum", #19
|
63 |
+
}
|
64 |
+
|
65 |
+
TAXONOMY = [
|
66 |
+
"Pre-training", #0
|
67 |
+
"Centroid-based pre-training", #1
|
68 |
+
"Data augmentation", #2
|
69 |
+
"Segmentation", #3
|
70 |
+
"Meta-learning", #4
|
71 |
+
"Meta-transfer", #5
|
72 |
+
"Extractive summarization", #6
|
73 |
+
"Prefix tuning", #7
|
74 |
+
]
|
75 |
+
|
76 |
+
MODEL_TO_TAXONOMY = [
|
77 |
+
TAXONOMY[0],
|
78 |
+
TAXONOMY[0],
|
79 |
+
TAXONOMY[5],
|
80 |
+
TAXONOMY[0],
|
81 |
+
TAXONOMY[7],
|
82 |
+
TAXONOMY[2],
|
83 |
+
TAXONOMY[0],
|
84 |
+
TAXONOMY[3],
|
85 |
+
TAXONOMY[2],
|
86 |
+
TAXONOMY[4],
|
87 |
+
TAXONOMY[0],
|
88 |
+
TAXONOMY[3],
|
89 |
+
TAXONOMY[5],
|
90 |
+
TAXONOMY[0],
|
91 |
+
TAXONOMY[0],
|
92 |
+
TAXONOMY[2],
|
93 |
+
TAXONOMY[0],
|
94 |
+
TAXONOMY[1],
|
95 |
+
TAXONOMY[6],
|
96 |
+
TAXONOMY[5],
|
97 |
+
]
|
98 |
+
|
99 |
+
model_tax = np.array([MODELS, MODEL_TO_TAXONOMY]).transpose()
|
100 |
+
|
101 |
+
SAMSUM_DATA = [
|
102 |
+
[model_tax[14][0], "base", model_tax[14][1], 0, 0, 39.60, 15.40, 30.10],
|
103 |
+
[model_tax[14][0], "large", model_tax[14][1], 0, 0, 41.30, 16.20, 30.90],
|
104 |
+
[model_tax[3][0], "SDPT w/RecAdam", model_tax[3][1], 300, 0, 45.23, 19.43, 35.37],
|
105 |
+
[model_tax[3][0], "DAPT", model_tax[3][1], 300, 0, 41.22, 17.88, 32.40],
|
106 |
+
[model_tax[3][0], "TAPT w/RecAdam", model_tax[3][1], 300, 0, 41.34, 17.88, 32.31],
|
107 |
+
[model_tax[13][0], "large", model_tax[13][1], 0, 0, 26.50, 7.90, 20.50],
|
108 |
+
[model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.27, 17.40, 33.70],
|
109 |
+
[model_tax[13][0], "large", model_tax[13][1], 100, 0, 47.60, 22.30, 38.70],
|
110 |
+
[model_tax[16][0], "", model_tax[16][1], 0, 0, 22.17, 6.88, 17.08],
|
111 |
+
[model_tax[16][0], "", model_tax[16][1], 10, 0, 43.89, 18.53, 34.76],
|
112 |
+
[model_tax[16][0], "", model_tax[16][1], 100, 0, 46.93, 20.65, 37.28],
|
113 |
+
[model_tax[8][0], "", model_tax[8][1], 10, 0, 32.50, 12.00, 27.00],
|
114 |
+
[model_tax[8][0], "", model_tax[8][1], 100, 0, 43.90, 19.70, 36.10],
|
115 |
+
[model_tax[15][0], "base, self-training", model_tax[15][1], 147, 0, 45.42, 21.23, 41.42],
|
116 |
+
[model_tax[15][0], "large, self-training", model_tax[15][1], 147, 0, 49.78, 24.65, 45.41],
|
117 |
+
[model_tax[15][0], "base, joint-training", model_tax[15][1], 147, 0, 44.89, 20.64, 40.58],
|
118 |
+
[model_tax[15][0], "large, joint-training", model_tax[15][1], 147, 0, 49.14, 23.45, 44.35],
|
119 |
+
[model_tax[12][0], "", model_tax[12][1], 10, 0, 46.06, 20.90, 40.34],
|
120 |
+
[model_tax[12][0], "", model_tax[12][1], 100, 0, 51.94, 24.75, 46.97],
|
121 |
+
]
|
122 |
+
|
123 |
+
CNN_DATA = [
|
124 |
+
[model_tax[13][0], "large", model_tax[13][1], 0, 0, 40.00, 17.30, 25.30],
|
125 |
+
[model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.00, 17.30, 25.30],
|
126 |
+
[model_tax[13][0], "large", model_tax[13][1], 100, 0, 41.10, 18.40, 27.50],
|
127 |
+
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 32.90, 13.28, 29.38],
|
128 |
+
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 37.25, 15.84, 33.49],
|
129 |
+
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 40.28, 18.21, 37.03],
|
130 |
+
[model_tax[1][0], "large", model_tax[1][1], 0, 0, 30.22, 11.88, 28.31],
|
131 |
+
[model_tax[1][0], "large", model_tax[1][1], 10, 0, 36.12, 13.70, 30.26],
|
132 |
+
[model_tax[1][0], "large", model_tax[1][1], 100, 0, 38.40, 17.02, 36.75],
|
133 |
+
[model_tax[10][0], "", model_tax[10][1], 300, 0, 38.31, 15.94, 25.41],
|
134 |
+
[model_tax[5][0], "", model_tax[5][1], 1000, 0, 34.47, 12.91, 31.36],
|
135 |
+
[model_tax[9][0], "", model_tax[9][1], 10, 0, 39.34, 16.53, 25.40],
|
136 |
+
[model_tax[9][0], "", model_tax[9][1], 100, 0, 39.94, 16.96, 26.09],
|
137 |
+
[model_tax[19][0], "", model_tax[19][1], 10, 0, 39.50, 16.80, 25.72],
|
138 |
+
[model_tax[19][0], "", model_tax[19][1], 100, 0, 40.53, 17.61, 26.64],
|
139 |
+
[model_tax[18][0], "", model_tax[18][1], 200, 0, 40.81, 17.78, 36.94],
|
140 |
+
]
|
141 |
+
|
142 |
+
BILLSUM_DATA = [
|
143 |
+
[model_tax[0][0], "large", model_tax[0][0], 0, 0, 41.02, 17.44, 25.24],
|
144 |
+
[model_tax[0][0], "large", model_tax[0][0], 10, 0, 40.48, 18.49, 27.27],
|
145 |
+
[model_tax[0][0], "large", model_tax[0][0], 100, 0, 44.78, 26.40, 34.40],
|
146 |
+
[model_tax[1][0], "large", model_tax[1][1], 0, 0, 41.32, 18.04, 25.11],
|
147 |
+
[model_tax[1][0], "large", model_tax[1][1], 10, 0, 42.55, 18.97, 26.92],
|
148 |
+
[model_tax[1][0], "large", model_tax[1][1], 100, 0, 46.48, 27.77, 36.53],
|
149 |
+
[model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 10, 0, 46.94, 23.04, 29.29],
|
150 |
+
[model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 100, 0, 50.4, 27.73, 33.74],
|
151 |
+
[model_tax[11][0], "", model_tax[11][1], 10, 0, 47.57, 24.14, 30.35],
|
152 |
+
[model_tax[11][0], "", model_tax[11][1], 100, 0, 51.59, 29.36, 35.04],
|
153 |
+
[model_tax[9][0], "", model_tax[9][1], 10, 0, 46.64, 25.07, 30.90],
|
154 |
+
[model_tax[9][0], "", model_tax[9][1], 100, 0, 48.18, 27.18, 33.28],
|
155 |
+
[model_tax[2][0], "", model_tax[2][1], 10, 0, 41.22, 18.61, 26.33],
|
156 |
+
[model_tax[2][0], "", model_tax[2][1], 100, 0, 45.29, 22.74, 29.56],
|
157 |
+
[model_tax[19][0], "", model_tax[19][1], 10, 0, 46.64, 25.07, 30.90],
|
158 |
+
[model_tax[19][0], "", model_tax[19][1], 100, 0, 48.18, 27.18, 33.28],
|
159 |
+
]
|
160 |
+
|
161 |
+
XSUM_DATA = [
|
162 |
+
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 19.27, 3.00, 12.72],
|
163 |
+
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 19.39, 3.45, 14.02],
|
164 |
+
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 39.07, 16.44, 31.27],
|
165 |
+
[model_tax[10][0], "", model_tax[10][1], 300, 0, 32.86, 11.27, 25.64],
|
166 |
+
[model_tax[16][0], "", model_tax[16][1], 0, 0, 20.72, 3.62, 16.56],
|
167 |
+
[model_tax[16][0], "", model_tax[16][1], 10, 0, 26.10, 7.20, 19.92],
|
168 |
+
[model_tax[16][0], "", model_tax[16][1], 100, 0, 33.33, 11.36, 25.85],
|
169 |
+
[model_tax[9][0], "", model_tax[9][1], 10, 0, 32.35, 11.86, 25.33],
|
170 |
+
[model_tax[9][0], "", model_tax[9][1], 100, 0, 35.54, 13.94, 27.79],
|
171 |
+
[model_tax[19][0], "", model_tax[19][1], 10, 0, 32.65, 12.10, 25.82],
|
172 |
+
[model_tax[19][0], "", model_tax[19][1], 100, 0, 36.51, 14.55, 29.01],
|
173 |
+
[model_tax[12][0], "", model_tax[12][1], 10, 0, 32.74, 10.90, 24.86],
|
174 |
+
[model_tax[12][0], "", model_tax[12][1], 100, 0, 35.69, 12.88, 27.25],
|
175 |
+
[model_tax[18][0], "", model_tax[18][1], 1000, 0, 21.15, 3.08, 15.91],
|
176 |
+
[model_tax[4][0], "", model_tax[4][1], 100, 0, 35.20, 13.30, 28.10],
|
177 |
+
]
|
178 |
+
|
179 |
+
MN_DATA = [
|
180 |
+
[model_tax[0][0], "large", model_tax[0][1], 0, 0, 36.54, 10.52, 18.67],
|
181 |
+
[model_tax[0][0], "large", model_tax[0][1], 10, 0, 39.79, 12.56, 20.06],
|
182 |
+
[model_tax[0][0], "large", model_tax[0][1], 100, 0, 41.04, 13.88, 21.52],
|
183 |
+
[model_tax[6][0], "", model_tax[6][1], 0, 0, 39.09, 13.91, 19.19],
|
184 |
+
[model_tax[6][0], "", model_tax[6][1], 10, 0, 44.02, 15.54, 22.03],
|
185 |
+
[model_tax[6][0], "", model_tax[6][1], 100, 0, 46.01, 16.76, 22.91],
|
186 |
+
[model_tax[17][0], "", model_tax[17][1], 0, 0, 43.5, 15.7, 22.4],
|
187 |
+
[model_tax[17][0], "", model_tax[17][1], 10, 0, 43.4, 16.6, 22.2],
|
188 |
+
[model_tax[17][0], "", model_tax[17][1], 100, 0, 45.7, 16.8, 23.2],
|
189 |
+
[model_tax[19][0], "", model_tax[19][1], 10, 0, 43.60, 14.85, 20.70],
|
190 |
+
[model_tax[19][0], "", model_tax[19][1], 100, 0, 45.55, 16.01, 22.12],
|
191 |
+
[model_tax[2][0], "", model_tax[2][1], 10, 0, 38.88, 12.78, 19.88],
|
192 |
+
[model_tax[2][0], "", model_tax[2][1], 100, 0, 39.64, 13.64, 20.45],
|
193 |
+
]
|
194 |
+
|
195 |
+
COL_NAMES = [
|
196 |
+
"Rank",
|
197 |
+
"Model",
|
198 |
+
"Additional info",
|
199 |
+
"Taxonomy",
|
200 |
+
"Training samples",
|
201 |
+
"ROUGE",
|
202 |
+
"ROUGE-1",
|
203 |
+
"ROUGE-2",
|
204 |
+
"ROUGE-L",
|
205 |
+
]
|
206 |
+
|
207 |
+
data = {
|
208 |
+
"samsum": pd.DataFrame(SAMSUM_DATA),
|
209 |
+
"cnn": pd.DataFrame(CNN_DATA),
|
210 |
+
"billsum": pd.DataFrame(BILLSUM_DATA),
|
211 |
+
"xsum": pd.DataFrame(XSUM_DATA),
|
212 |
+
"multinews": pd.DataFrame(MN_DATA),
|
213 |
+
}
|
214 |
+
|
215 |
+
def make_clickable(text, url):
|
216 |
+
return "<u>[{}]({})</u>".format(text, url)
|
217 |
+
|
218 |
+
for dataset in data:
|
219 |
+
data[dataset].columns = COL_NAMES[1:]
|
220 |
+
data[dataset]["ROUGE"] = np.around(np.mean(data[dataset][["ROUGE-1", "ROUGE-2", "ROUGE-L"]], axis=1), decimals=2)
|
221 |
+
data[dataset].sort_values("ROUGE", ascending=False, inplace=True) # to default sort by ROUGE
|
222 |
+
# Add Rank column
|
223 |
+
data[dataset].insert(0, COL_NAMES[0], range(1, 1 + len(data[dataset])))
|
224 |
+
# Add link to papers/repos
|
225 |
+
data[dataset]["Model"] = data[dataset]["Model"].apply(lambda x: make_clickable(x, REPOS_PAPERS[x]))
|
226 |
+
print(data[dataset]["Model"])
|
227 |
+
# data[dataset].drop("ROUGE", axis=1, inplace=True)
|
228 |
+
|
229 |
+
|
230 |
+
NUM_DATASETS = len(set(DATASETS))
|
231 |
+
NUM_MODELS = len(set(MODELS))
|
232 |
+
|
233 |
+
|
234 |
+
# 1. Force headers to wrap
|
235 |
+
# 2. Force model column (maximum) width
|
236 |
+
# 3. Prevent model column from overflowing, scroll instead
|
237 |
+
css = """
|
238 |
+
table > thead {
|
239 |
+
white-space: normal
|
240 |
+
}
|
241 |
+
table {
|
242 |
+
--cell-width-1: 210px
|
243 |
+
}
|
244 |
+
table > tbody > tr > td:nth-child(2) > div {
|
245 |
+
overflow-x: auto
|
246 |
+
}
|
247 |
+
"""
|
248 |
+
|
249 |
+
block = gr.Blocks(css=css)
|
250 |
+
with block:
|
251 |
+
gr.Markdown(f"""
|
252 |
+
This is a leaderboard for Few-Shot Summarization (FSS).
|
253 |
+
|
254 |
+
- **Total Datasets**: {NUM_DATASETS}
|
255 |
+
- **Total Models**: {NUM_MODELS}
|
256 |
+
- **Metric**: ROUGE Score
|
257 |
+
|
258 |
+
For more information about the metrics and models employed and to gain a greater understanding of the general taxonomy of FSS, please refer to our [Survey on FSS](the paper will be published soon 🤗).
|
259 |
+
""")
|
260 |
+
|
261 |
+
with gr.Tabs():
|
262 |
+
for dataset in data:
|
263 |
+
dataset_name = DATASETS[dataset]
|
264 |
+
with gr.TabItem(dataset_name):
|
265 |
+
with gr.Row():
|
266 |
+
gr.Markdown(f"""
|
267 |
+
**{dataset_name}** leaderboard
|
268 |
+
- **ROUGE** is the average of ROUGE-1, ROUGE-2 and ROUGE-L
|
269 |
+
- **RANK** is defined following ROUGE column values
|
270 |
+
""")
|
271 |
+
with gr.Row():
|
272 |
+
data_classification = gr.components.Dataframe(
|
273 |
+
data[dataset],
|
274 |
+
datatype=["markdown", "markdown", "markdown", "number", "number", "number", "number", "number"],
|
275 |
+
type="pandas",
|
276 |
+
)
|
277 |
+
|
278 |
+
# gr.Markdown(r"""
|
279 |
+
|
280 |
+
# Made with ❤️ for NLP. If this work is useful to you, please consider citing:
|
281 |
+
|
282 |
+
# ```bibtex
|
283 |
+
# @article{muennighoff2022mteb,
|
284 |
+
# doi = {10.48550/ARXIV.2210.07316},
|
285 |
+
# url = {https://arxiv.org/abs/2210.07316},
|
286 |
+
# author = {Qui, Quo, Qua},
|
287 |
+
# title = {Survey on Low Resource Summarization},
|
288 |
+
# publisher = {arXiv},
|
289 |
+
# journal={arXiv preprint arXiv:2210.07316},
|
290 |
+
# year = {2024}
|
291 |
+
# }
|
292 |
+
# ```
|
293 |
+
# """)
|
294 |
+
|
295 |
+
block.queue(max_size=10)
|
296 |
+
block.launch()
|