Spaces:
Runtime error
Runtime error
Commit
·
bd61ad8
1
Parent(s):
aca408b
Update app.py
Browse files
app.py
CHANGED
@@ -9,22 +9,14 @@ import zipfile
|
|
9 |
REFERENCE_NAME = "references"
|
10 |
SUBMISSION_NAME = "submissions"
|
11 |
|
12 |
-
REFERENCE_URL = os.path.join(
|
13 |
-
|
14 |
-
)
|
15 |
-
SUBMISSION_URL = os.path.join(
|
16 |
-
"https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME
|
17 |
-
)
|
18 |
|
19 |
# grab these repos using the token provided
|
20 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
21 |
|
22 |
-
reference_repo = Repository(
|
23 |
-
|
24 |
-
)
|
25 |
-
submission_repo = Repository(
|
26 |
-
local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
|
27 |
-
)
|
28 |
submission_repo.git_pull()
|
29 |
|
30 |
all_submissions = [
|
@@ -46,7 +38,7 @@ TEST_SETS = [
|
|
46 |
EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
|
47 |
|
48 |
# define the optional test sets - ignore for now
|
49 |
-
OPTIONAL_TEST_SETS = []
|
50 |
OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
|
51 |
|
52 |
# load all metrics
|
@@ -68,9 +60,7 @@ METRIC_MAP = {
|
|
68 |
|
69 |
def compute_score(pred_file, ref_file, metric):
|
70 |
"""Assess predicted file against reference file for a given metric."""
|
71 |
-
with open(pred_file, "r", encoding="utf-8") as pred, open(
|
72 |
-
ref_file, "r", encoding="utf-8"
|
73 |
-
) as ref:
|
74 |
# TODO: any post-processing required?
|
75 |
pred_lines = [line.strip() for line in pred.readlines()]
|
76 |
ref_lines = [line.strip() for line in ref.readlines()]
|
@@ -78,6 +68,7 @@ def compute_score(pred_file, ref_file, metric):
|
|
78 |
score = metric(ref_lines, pred_lines)
|
79 |
return score
|
80 |
|
|
|
81 |
# load up the results file
|
82 |
CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
|
83 |
|
@@ -103,18 +94,21 @@ table.index = table.index + 1
|
|
103 |
st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
|
104 |
|
105 |
st.markdown(
|
106 |
-
""
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
110 |
$$
|
111 |
\begin{gathered}
|
112 |
0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
|
113 |
0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
|
114 |
\end{gathered}
|
115 |
$$
|
116 |
-
|
117 |
)
|
|
|
118 |
|
119 |
# st.table(table)
|
120 |
st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
|
@@ -154,7 +148,7 @@ if submit_button:
|
|
154 |
|
155 |
submission = uploaded_file.name.split(".zip")[0]
|
156 |
with st.spinner(f"Uploading {submission}..."):
|
157 |
-
with zipfile.ZipFile(uploaded_file,
|
158 |
zip_ref.extractall(submission_repo.local_dir)
|
159 |
submission_repo.push_to_hub()
|
160 |
|
@@ -179,12 +173,15 @@ if submit_button:
|
|
179 |
|
180 |
score = compute_score(pred_file, ref_file, metric)
|
181 |
results[test_set] = round(100 * score, 2)
|
182 |
-
|
183 |
# TODO: assessment of 'optional' test sets
|
184 |
|
185 |
# XTREME-S score is computed over the mandatory test sets only
|
186 |
-
average_score =
|
187 |
-
|
|
|
|
|
|
|
188 |
results["average-score"] = round(average_score, 2)
|
189 |
|
190 |
all_results = all_results.append(results, ignore_index=True)
|
@@ -193,4 +190,4 @@ if submit_button:
|
|
193 |
all_results.to_csv(CSV_RESULTS_FILE, index=False)
|
194 |
commit_url = submission_repo.push_to_hub()
|
195 |
|
196 |
-
st.success(
|
|
|
9 |
REFERENCE_NAME = "references"
|
10 |
SUBMISSION_NAME = "submissions"
|
11 |
|
12 |
+
REFERENCE_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME)
|
13 |
+
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME)
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# grab these repos using the token provided
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
|
18 |
+
reference_repo = Repository(local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN)
|
19 |
+
submission_repo = Repository(local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
20 |
submission_repo.git_pull()
|
21 |
|
22 |
all_submissions = [
|
|
|
38 |
EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
|
39 |
|
40 |
# define the optional test sets - ignore for now
|
41 |
+
OPTIONAL_TEST_SETS = [] # ["f-r5"]
|
42 |
OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
|
43 |
|
44 |
# load all metrics
|
|
|
60 |
|
61 |
def compute_score(pred_file, ref_file, metric):
|
62 |
"""Assess predicted file against reference file for a given metric."""
|
63 |
+
with open(pred_file, "r", encoding="utf-8") as pred, open(ref_file, "r", encoding="utf-8") as ref:
|
|
|
|
|
64 |
# TODO: any post-processing required?
|
65 |
pred_lines = [line.strip() for line in pred.readlines()]
|
66 |
ref_lines = [line.strip() for line in ref.readlines()]
|
|
|
68 |
score = metric(ref_lines, pred_lines)
|
69 |
return score
|
70 |
|
71 |
+
|
72 |
# load up the results file
|
73 |
CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
|
74 |
|
|
|
94 |
st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
|
95 |
|
96 |
st.markdown(
|
97 |
+
"This is the leaderboard for the XTREME-S benchmark. Submitted systems are ranked by the **average score**, which"
|
98 |
+
" is a weighted average of the mandatory test sets:"
|
99 |
+
)
|
100 |
+
# hacky way of getting math-mode to render
|
101 |
+
st.write(
|
102 |
+
r"""
|
103 |
$$
|
104 |
\begin{gathered}
|
105 |
0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
|
106 |
0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
|
107 |
\end{gathered}
|
108 |
$$
|
109 |
+
"""
|
110 |
)
|
111 |
+
st.markdown("The optional dataset of f-r5 does not contribute to the average score.")
|
112 |
|
113 |
# st.table(table)
|
114 |
st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
|
|
|
148 |
|
149 |
submission = uploaded_file.name.split(".zip")[0]
|
150 |
with st.spinner(f"Uploading {submission}..."):
|
151 |
+
with zipfile.ZipFile(uploaded_file, "r") as zip_ref:
|
152 |
zip_ref.extractall(submission_repo.local_dir)
|
153 |
submission_repo.push_to_hub()
|
154 |
|
|
|
173 |
|
174 |
score = compute_score(pred_file, ref_file, metric)
|
175 |
results[test_set] = round(100 * score, 2)
|
176 |
+
|
177 |
# TODO: assessment of 'optional' test sets
|
178 |
|
179 |
# XTREME-S score is computed over the mandatory test sets only
|
180 |
+
average_score = (
|
181 |
+
0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3)
|
182 |
+
+ 0.4 * results["covost-2"]
|
183 |
+
+ 0.2 * (results["f-lid"] + results["m-14"]) / 2
|
184 |
+
)
|
185 |
results["average-score"] = round(average_score, 2)
|
186 |
|
187 |
all_results = all_results.append(results, ignore_index=True)
|
|
|
190 |
all_results.to_csv(CSV_RESULTS_FILE, index=False)
|
191 |
commit_url = submission_repo.push_to_hub()
|
192 |
|
193 |
+
st.success("Please refresh this space (CTRL+R) to see your result")
|