Anonym Submission commited on
Commit
73863f3
·
verified ·
1 Parent(s): 861d11d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +296 -0
app.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ import json
3
+
4
+ # from datasets import load_dataset
5
+ import gradio as gr
6
+ # from huggingface_hub import get_hf_file_metadata, HfApi, hf_hub_download, hf_hub_url
7
+ # from huggingface_hub.repocard import metadata_load
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+ DATASETS = {
12
+ "samsum": "SAMSum",
13
+ "cnn": "CNN/DailyMail",
14
+ "xsum": "XSum",
15
+ "billsum": "BillSum",
16
+ "multinews": "Multi-News",
17
+ }
18
+
19
+ MODELS = [
20
+ "PEGASUS", #0
21
+ "PEGASUS-X", #1
22
+ "MTL-ABS", #2
23
+ "BART SDPT/DAPT/TAPT", #3
24
+ "Prefix-tuning", #4
25
+ "ExtraPhrase", #5
26
+ "Primera", #6
27
+ "Se3", #7
28
+ "DADS", #8
29
+ "LML-LRS", #9
30
+ "PSP", #10
31
+ "Athena", #11
32
+ "SPEC", #12
33
+ "Z-Code++", #13
34
+ "DIONYSUS", #14
35
+ "COMPO", #15
36
+ "UNISUMM", #16
37
+ "Centrum", #17
38
+ "ParaSum", #18
39
+ "EFLRAS", #19
40
+ ]
41
+
42
+ REPOS_PAPERS = {
43
+ "PEGASUS": "https://github.com/google-research/pegasus", #0
44
+ "PEGASUS-X": "https://github.com/google-research/pegasus", #1
45
+ "MTL-ABS": "https://github.com/YiSyuanChen/MTL-ABS", #2
46
+ "BART SDPT/DAPT/TAPT": "https://github.com/TysonYu/AdaptSum", #3
47
+ "Prefix-tuning": "https://github.com/XiangLi1999/PrefixTuning", #4
48
+ "ExtraPhrase": "https://github.com/loem-ms/ExtraPhrase", #5
49
+ "Primera": "https://github.com/allenai/PRIMER", #6
50
+ "Se3": "https://ojs.aaai.org/index.php/AAAI/article/view/21357", #7
51
+ "DADS": "https://aclanthology.org/2022.findings-naacl.53.pdf", #8
52
+ "LML-LRS": "https://dl.acm.org/doi/pdf/10.1145/3477495.3531908", #9
53
+ "PSP": "https://aclanthology.org/2022.coling-1.553.pdf", #10
54
+ "Athena": "https://www.sciencedirect.com/science/article/pii/S0925231223004794?casa_token=ptLMl-LZLbQAAAAA:9Aq7HEUf6dRrIg5MTj4hZm2eaWJSeTDKmnXxS52fkZ131ejkYHdZgGimL0TFCFXy57qF1k9KTKE​", #11
55
+ "SPEC": "https://github.com/YiSyuanChen/SPEC", #12
56
+ "Z-Code++": "https://arxiv.org/pdf/2208.09770.pdf", #13
57
+ "DIONYSUS": "https://arxiv.org/pdf/2212.10018.pdf", #14
58
+ "COMPO": "https://github.com/ozyyshr/Compo", #15
59
+ "UNISUMM": "https://github.com/microsoft/UniSumm", #16
60
+ "Centrum": "https://github.com/ratishsp/centrum", #17
61
+ "ParaSum": "https://link.springer.com/chapter/10.1007/978-3-031-40289-0_9", #18
62
+ "EFLRAS": "https://github.com/NLPlab-skku/SummaryXAI-QA/tree/main/Low-Resource-Sum", #19
63
+ }
64
+
65
+ TAXONOMY = [
66
+ "Pre-training", #0
67
+ "Centroid-based pre-training", #1
68
+ "Data augmentation", #2
69
+ "Segmentation", #3
70
+ "Meta-learning", #4
71
+ "Meta-transfer", #5
72
+ "Extractive summarization", #6
73
+ "Prefix tuning", #7
74
+ ]
75
+
76
+ MODEL_TO_TAXONOMY = [
77
+ TAXONOMY[0],
78
+ TAXONOMY[0],
79
+ TAXONOMY[5],
80
+ TAXONOMY[0],
81
+ TAXONOMY[7],
82
+ TAXONOMY[2],
83
+ TAXONOMY[0],
84
+ TAXONOMY[3],
85
+ TAXONOMY[2],
86
+ TAXONOMY[4],
87
+ TAXONOMY[0],
88
+ TAXONOMY[3],
89
+ TAXONOMY[5],
90
+ TAXONOMY[0],
91
+ TAXONOMY[0],
92
+ TAXONOMY[2],
93
+ TAXONOMY[0],
94
+ TAXONOMY[1],
95
+ TAXONOMY[6],
96
+ TAXONOMY[5],
97
+ ]
98
+
99
+ model_tax = np.array([MODELS, MODEL_TO_TAXONOMY]).transpose()
100
+
101
+ SAMSUM_DATA = [
102
+ [model_tax[14][0], "base", model_tax[14][1], 0, 0, 39.60, 15.40, 30.10],
103
+ [model_tax[14][0], "large", model_tax[14][1], 0, 0, 41.30, 16.20, 30.90],
104
+ [model_tax[3][0], "SDPT w/RecAdam", model_tax[3][1], 300, 0, 45.23, 19.43, 35.37],
105
+ [model_tax[3][0], "DAPT", model_tax[3][1], 300, 0, 41.22, 17.88, 32.40],
106
+ [model_tax[3][0], "TAPT w/RecAdam", model_tax[3][1], 300, 0, 41.34, 17.88, 32.31],
107
+ [model_tax[13][0], "large", model_tax[13][1], 0, 0, 26.50, 7.90, 20.50],
108
+ [model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.27, 17.40, 33.70],
109
+ [model_tax[13][0], "large", model_tax[13][1], 100, 0, 47.60, 22.30, 38.70],
110
+ [model_tax[16][0], "", model_tax[16][1], 0, 0, 22.17, 6.88, 17.08],
111
+ [model_tax[16][0], "", model_tax[16][1], 10, 0, 43.89, 18.53, 34.76],
112
+ [model_tax[16][0], "", model_tax[16][1], 100, 0, 46.93, 20.65, 37.28],
113
+ [model_tax[8][0], "", model_tax[8][1], 10, 0, 32.50, 12.00, 27.00],
114
+ [model_tax[8][0], "", model_tax[8][1], 100, 0, 43.90, 19.70, 36.10],
115
+ [model_tax[15][0], "base, self-training", model_tax[15][1], 147, 0, 45.42, 21.23, 41.42],
116
+ [model_tax[15][0], "large, self-training", model_tax[15][1], 147, 0, 49.78, 24.65, 45.41],
117
+ [model_tax[15][0], "base, joint-training", model_tax[15][1], 147, 0, 44.89, 20.64, 40.58],
118
+ [model_tax[15][0], "large, joint-training", model_tax[15][1], 147, 0, 49.14, 23.45, 44.35],
119
+ [model_tax[12][0], "", model_tax[12][1], 10, 0, 46.06, 20.90, 40.34],
120
+ [model_tax[12][0], "", model_tax[12][1], 100, 0, 51.94, 24.75, 46.97],
121
+ ]
122
+
123
+ CNN_DATA = [
124
+ [model_tax[13][0], "large", model_tax[13][1], 0, 0, 40.00, 17.30, 25.30],
125
+ [model_tax[13][0], "large", model_tax[13][1], 10, 0, 40.00, 17.30, 25.30],
126
+ [model_tax[13][0], "large", model_tax[13][1], 100, 0, 41.10, 18.40, 27.50],
127
+ [model_tax[0][0], "large", model_tax[0][1], 0, 0, 32.90, 13.28, 29.38],
128
+ [model_tax[0][0], "large", model_tax[0][1], 10, 0, 37.25, 15.84, 33.49],
129
+ [model_tax[0][0], "large", model_tax[0][1], 100, 0, 40.28, 18.21, 37.03],
130
+ [model_tax[1][0], "large", model_tax[1][1], 0, 0, 30.22, 11.88, 28.31],
131
+ [model_tax[1][0], "large", model_tax[1][1], 10, 0, 36.12, 13.70, 30.26],
132
+ [model_tax[1][0], "large", model_tax[1][1], 100, 0, 38.40, 17.02, 36.75],
133
+ [model_tax[10][0], "", model_tax[10][1], 300, 0, 38.31, 15.94, 25.41],
134
+ [model_tax[5][0], "", model_tax[5][1], 1000, 0, 34.47, 12.91, 31.36],
135
+ [model_tax[9][0], "", model_tax[9][1], 10, 0, 39.34, 16.53, 25.40],
136
+ [model_tax[9][0], "", model_tax[9][1], 100, 0, 39.94, 16.96, 26.09],
137
+ [model_tax[19][0], "", model_tax[19][1], 10, 0, 39.50, 16.80, 25.72],
138
+ [model_tax[19][0], "", model_tax[19][1], 100, 0, 40.53, 17.61, 26.64],
139
+ [model_tax[18][0], "", model_tax[18][1], 200, 0, 40.81, 17.78, 36.94],
140
+ ]
141
+
142
+ BILLSUM_DATA = [
143
+ [model_tax[0][0], "large", model_tax[0][0], 0, 0, 41.02, 17.44, 25.24],
144
+ [model_tax[0][0], "large", model_tax[0][0], 10, 0, 40.48, 18.49, 27.27],
145
+ [model_tax[0][0], "large", model_tax[0][0], 100, 0, 44.78, 26.40, 34.40],
146
+ [model_tax[1][0], "large", model_tax[1][1], 0, 0, 41.32, 18.04, 25.11],
147
+ [model_tax[1][0], "large", model_tax[1][1], 10, 0, 42.55, 18.97, 26.92],
148
+ [model_tax[1][0], "large", model_tax[1][1], 100, 0, 46.48, 27.77, 36.53],
149
+ [model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 10, 0, 46.94, 23.04, 29.29],
150
+ [model_tax[7][0], "LED base(512) w/Se3", model_tax[7][1], 100, 0, 50.4, 27.73, 33.74],
151
+ [model_tax[11][0], "", model_tax[11][1], 10, 0, 47.57, 24.14, 30.35],
152
+ [model_tax[11][0], "", model_tax[11][1], 100, 0, 51.59, 29.36, 35.04],
153
+ [model_tax[9][0], "", model_tax[9][1], 10, 0, 46.64, 25.07, 30.90],
154
+ [model_tax[9][0], "", model_tax[9][1], 100, 0, 48.18, 27.18, 33.28],
155
+ [model_tax[2][0], "", model_tax[2][1], 10, 0, 41.22, 18.61, 26.33],
156
+ [model_tax[2][0], "", model_tax[2][1], 100, 0, 45.29, 22.74, 29.56],
157
+ [model_tax[19][0], "", model_tax[19][1], 10, 0, 46.64, 25.07, 30.90],
158
+ [model_tax[19][0], "", model_tax[19][1], 100, 0, 48.18, 27.18, 33.28],
159
+ ]
160
+
161
+ XSUM_DATA = [
162
+ [model_tax[0][0], "large", model_tax[0][1], 0, 0, 19.27, 3.00, 12.72],
163
+ [model_tax[0][0], "large", model_tax[0][1], 10, 0, 19.39, 3.45, 14.02],
164
+ [model_tax[0][0], "large", model_tax[0][1], 100, 0, 39.07, 16.44, 31.27],
165
+ [model_tax[10][0], "", model_tax[10][1], 300, 0, 32.86, 11.27, 25.64],
166
+ [model_tax[16][0], "", model_tax[16][1], 0, 0, 20.72, 3.62, 16.56],
167
+ [model_tax[16][0], "", model_tax[16][1], 10, 0, 26.10, 7.20, 19.92],
168
+ [model_tax[16][0], "", model_tax[16][1], 100, 0, 33.33, 11.36, 25.85],
169
+ [model_tax[9][0], "", model_tax[9][1], 10, 0, 32.35, 11.86, 25.33],
170
+ [model_tax[9][0], "", model_tax[9][1], 100, 0, 35.54, 13.94, 27.79],
171
+ [model_tax[19][0], "", model_tax[19][1], 10, 0, 32.65, 12.10, 25.82],
172
+ [model_tax[19][0], "", model_tax[19][1], 100, 0, 36.51, 14.55, 29.01],
173
+ [model_tax[12][0], "", model_tax[12][1], 10, 0, 32.74, 10.90, 24.86],
174
+ [model_tax[12][0], "", model_tax[12][1], 100, 0, 35.69, 12.88, 27.25],
175
+ [model_tax[18][0], "", model_tax[18][1], 1000, 0, 21.15, 3.08, 15.91],
176
+ [model_tax[4][0], "", model_tax[4][1], 100, 0, 35.20, 13.30, 28.10],
177
+ ]
178
+
179
+ MN_DATA = [
180
+ [model_tax[0][0], "large", model_tax[0][1], 0, 0, 36.54, 10.52, 18.67],
181
+ [model_tax[0][0], "large", model_tax[0][1], 10, 0, 39.79, 12.56, 20.06],
182
+ [model_tax[0][0], "large", model_tax[0][1], 100, 0, 41.04, 13.88, 21.52],
183
+ [model_tax[6][0], "", model_tax[6][1], 0, 0, 39.09, 13.91, 19.19],
184
+ [model_tax[6][0], "", model_tax[6][1], 10, 0, 44.02, 15.54, 22.03],
185
+ [model_tax[6][0], "", model_tax[6][1], 100, 0, 46.01, 16.76, 22.91],
186
+ [model_tax[17][0], "", model_tax[17][1], 0, 0, 43.5, 15.7, 22.4],
187
+ [model_tax[17][0], "", model_tax[17][1], 10, 0, 43.4, 16.6, 22.2],
188
+ [model_tax[17][0], "", model_tax[17][1], 100, 0, 45.7, 16.8, 23.2],
189
+ [model_tax[19][0], "", model_tax[19][1], 10, 0, 43.60, 14.85, 20.70],
190
+ [model_tax[19][0], "", model_tax[19][1], 100, 0, 45.55, 16.01, 22.12],
191
+ [model_tax[2][0], "", model_tax[2][1], 10, 0, 38.88, 12.78, 19.88],
192
+ [model_tax[2][0], "", model_tax[2][1], 100, 0, 39.64, 13.64, 20.45],
193
+ ]
194
+
195
+ COL_NAMES = [
196
+ "Rank",
197
+ "Model",
198
+ "Additional info",
199
+ "Taxonomy",
200
+ "Training samples",
201
+ "ROUGE",
202
+ "ROUGE-1",
203
+ "ROUGE-2",
204
+ "ROUGE-L",
205
+ ]
206
+
207
+ data = {
208
+ "samsum": pd.DataFrame(SAMSUM_DATA),
209
+ "cnn": pd.DataFrame(CNN_DATA),
210
+ "billsum": pd.DataFrame(BILLSUM_DATA),
211
+ "xsum": pd.DataFrame(XSUM_DATA),
212
+ "multinews": pd.DataFrame(MN_DATA),
213
+ }
214
+
215
+ def make_clickable(text, url):
216
+ return "<u>[{}]({})</u>".format(text, url)
217
+
218
+ for dataset in data:
219
+ data[dataset].columns = COL_NAMES[1:]
220
+ data[dataset]["ROUGE"] = np.around(np.mean(data[dataset][["ROUGE-1", "ROUGE-2", "ROUGE-L"]], axis=1), decimals=2)
221
+ data[dataset].sort_values("ROUGE", ascending=False, inplace=True) # to default sort by ROUGE
222
+ # Add Rank column
223
+ data[dataset].insert(0, COL_NAMES[0], range(1, 1 + len(data[dataset])))
224
+ # Add link to papers/repos
225
+ data[dataset]["Model"] = data[dataset]["Model"].apply(lambda x: make_clickable(x, REPOS_PAPERS[x]))
226
+ print(data[dataset]["Model"])
227
+ # data[dataset].drop("ROUGE", axis=1, inplace=True)
228
+
229
+
230
+ NUM_DATASETS = len(set(DATASETS))
231
+ NUM_MODELS = len(set(MODELS))
232
+
233
+
234
+ # 1. Force headers to wrap
235
+ # 2. Force model column (maximum) width
236
+ # 3. Prevent model column from overflowing, scroll instead
237
+ css = """
238
+ table > thead {
239
+ white-space: normal
240
+ }
241
+ table {
242
+ --cell-width-1: 210px
243
+ }
244
+ table > tbody > tr > td:nth-child(2) > div {
245
+ overflow-x: auto
246
+ }
247
+ """
248
+
249
+ block = gr.Blocks(css=css)
250
+ with block:
251
+ gr.Markdown(f"""
252
+ This is a leaderboard for Few-Shot Summarization (FSS).
253
+
254
+ - **Total Datasets**: {NUM_DATASETS}
255
+ - **Total Models**: {NUM_MODELS}
256
+ - **Metric**: ROUGE Score
257
+
258
+ For more information about the metrics and models employed and to gain a greater understanding of the general taxonomy of FSS, please refer to our [Survey on FSS](the paper will be published soon 🤗).
259
+ """)
260
+
261
+ with gr.Tabs():
262
+ for dataset in data:
263
+ dataset_name = DATASETS[dataset]
264
+ with gr.TabItem(dataset_name):
265
+ with gr.Row():
266
+ gr.Markdown(f"""
267
+ **{dataset_name}** leaderboard
268
+ - **ROUGE** is the average of ROUGE-1, ROUGE-2 and ROUGE-L
269
+ - **RANK** is defined following ROUGE column values
270
+ """)
271
+ with gr.Row():
272
+ data_classification = gr.components.Dataframe(
273
+ data[dataset],
274
+ datatype=["markdown", "markdown", "markdown", "number", "number", "number", "number", "number"],
275
+ type="pandas",
276
+ )
277
+
278
+ # gr.Markdown(r"""
279
+
280
+ # Made with ❤️ for NLP. If this work is useful to you, please consider citing:
281
+
282
+ # ```bibtex
283
+ # @article{muennighoff2022mteb,
284
+ # doi = {10.48550/ARXIV.2210.07316},
285
+ # url = {https://arxiv.org/abs/2210.07316},
286
+ # author = {Qui, Quo, Qua},
287
+ # title = {Survey on Low Resource Summarization},
288
+ # publisher = {arXiv},
289
+ # journal={arXiv preprint arXiv:2210.07316},
290
+ # year = {2024}
291
+ # }
292
+ # ```
293
+ # """)
294
+
295
+ block.queue(max_size=10)
296
+ block.launch()