David Pomerenke
commited on
Commit
·
8190782
1
Parent(s):
d5fc8b3
Add links to add CommonVoice recordings
Browse files- app.py +3 -2
- evals.py +23 -7
- results.json +20 -10
app.py
CHANGED
|
@@ -178,6 +178,7 @@ def create_language_stats_df(results):
|
|
| 178 |
model = best_score['model']
|
| 179 |
model_name = model.split('/')[-1] if model else "N/A"
|
| 180 |
model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
|
|
|
|
| 181 |
row = {
|
| 182 |
"Language": f"**{lang['language_name']}**",
|
| 183 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
|
@@ -189,7 +190,7 @@ def create_language_stats_df(results):
|
|
| 189 |
"Best Model BLEU": round(best_score["bleu"], 3)
|
| 190 |
if best_score["bleu"] is not None
|
| 191 |
else "N/A",
|
| 192 |
-
"CommonVoice Hours":
|
| 193 |
}
|
| 194 |
flat_data.append(row)
|
| 195 |
|
|
@@ -198,7 +199,7 @@ def create_language_stats_df(results):
|
|
| 198 |
value=df,
|
| 199 |
label="Language Results",
|
| 200 |
show_search="search",
|
| 201 |
-
datatype=["markdown", "number", "number", "number", "markdown", "number"],
|
| 202 |
)
|
| 203 |
|
| 204 |
|
|
|
|
| 178 |
model = best_score['model']
|
| 179 |
model_name = model.split('/')[-1] if model else "N/A"
|
| 180 |
model_link = f"<a href='https://openrouter.ai/{model}' style='text-decoration: none; color: inherit;'>{model_name}</a>" if model else "N/A"
|
| 181 |
+
commonvoice_link = f"<!--{lang['commonvoice_hours']:07} (for sorting)--> <a href='https://commonvoice.mozilla.org/{lang['commonvoice_locale']}/speak' style='text-decoration: none; color: inherit;'>🎙️ {lang['commonvoice_hours']}</a>" if lang["commonvoice_hours"] else "N/A"
|
| 182 |
row = {
|
| 183 |
"Language": f"**{lang['language_name']}**",
|
| 184 |
"Speakers (M)": round(lang["speakers"] / 1_000_000, 1),
|
|
|
|
| 190 |
"Best Model BLEU": round(best_score["bleu"], 3)
|
| 191 |
if best_score["bleu"] is not None
|
| 192 |
else "N/A",
|
| 193 |
+
"CommonVoice Hours": commonvoice_link,
|
| 194 |
}
|
| 195 |
flat_data.append(row)
|
| 196 |
|
|
|
|
| 199 |
value=df,
|
| 200 |
label="Language Results",
|
| 201 |
show_search="search",
|
| 202 |
+
datatype=["markdown", "number", "number", "number", "markdown", "number", "markdown"],
|
| 203 |
)
|
| 204 |
|
| 205 |
|
evals.py
CHANGED
|
@@ -61,11 +61,15 @@ languages = pd.DataFrame(list(languages.items()), columns=["bcp_47", "speakers"]
|
|
| 61 |
languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
|
| 62 |
|
| 63 |
# load script codes and names
|
| 64 |
-
scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def script_name(iso15924):
|
| 67 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
| 68 |
|
|
|
|
| 69 |
# load benchmark languages and scripts
|
| 70 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
| 71 |
benchmark_languages = pd.DataFrame(
|
|
@@ -94,16 +98,20 @@ def get_commonvoice_stats(date: date):
|
|
| 94 |
|
| 95 |
|
| 96 |
commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
|
| 97 |
-
columns={"locale": "
|
| 98 |
-
)[["
|
| 99 |
# ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
|
| 100 |
-
commonvoice_stats["bcp_47"] = commonvoice_stats["
|
| 101 |
lambda x: re.sub(r"-[A-Z]{2}$", "", x)
|
| 102 |
)
|
| 103 |
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
| 104 |
lambda x: standardize_tag(x, macro=True)
|
| 105 |
) # this does not really seem to get macrolanguages though, e.g. not for Quechua
|
| 106 |
-
commonvoice_stats =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# merge data
|
| 109 |
languages = pd.merge(
|
|
@@ -149,6 +157,7 @@ async def complete(**kwargs):
|
|
| 149 |
raise Exception(response)
|
| 150 |
return response
|
| 151 |
|
|
|
|
| 152 |
async def translate(model, target_language, sentence):
|
| 153 |
script = script_name(target_language.iso15924)
|
| 154 |
reply = await complete(
|
|
@@ -170,7 +179,9 @@ def mean(l):
|
|
| 170 |
|
| 171 |
|
| 172 |
def load_sentences(language):
|
| 173 |
-
return open(
|
|
|
|
|
|
|
| 174 |
|
| 175 |
|
| 176 |
# evaluation!
|
|
@@ -196,7 +207,11 @@ async def main():
|
|
| 196 |
original_sentences, target_languages.itertuples()
|
| 197 |
)
|
| 198 |
]
|
| 199 |
-
predictions = await tqdm_asyncio.gather(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
target_sentences = [
|
| 201 |
load_sentences(lang)[i]
|
| 202 |
for i, lang in enumerate(target_languages.itertuples())
|
|
@@ -227,6 +242,7 @@ async def main():
|
|
| 227 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
| 228 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
| 229 |
"commonvoice_hours": language.commonvoice_hours,
|
|
|
|
| 230 |
}
|
| 231 |
)
|
| 232 |
with open("results.json", "w") as f:
|
|
|
|
| 61 |
languages["name"] = languages["bcp_47"].apply(lambda x: Language.get(x).display_name())
|
| 62 |
|
| 63 |
# load script codes and names
|
| 64 |
+
scripts = pd.read_csv("data/ScriptCodes.csv").rename(
|
| 65 |
+
columns={"Code": "iso15924", "English Name": "script_name"}
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
|
| 69 |
def script_name(iso15924):
|
| 70 |
return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
|
| 71 |
|
| 72 |
+
|
| 73 |
# load benchmark languages and scripts
|
| 74 |
benchmark_dir = "data/floresp-v2.0-rc.3/dev"
|
| 75 |
benchmark_languages = pd.DataFrame(
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
commonvoice_stats = pd.DataFrame(get_commonvoice_stats(date.today())).rename(
|
| 101 |
+
columns={"locale": "commonvoice_locale", "validatedHours": "commonvoice_hours"}
|
| 102 |
+
)[["commonvoice_locale", "commonvoice_hours"]]
|
| 103 |
# ignore country (language is language) (in practive this is only relevant to zh-CN/zh-TW/zh-HK)
|
| 104 |
+
commonvoice_stats["bcp_47"] = commonvoice_stats["commonvoice_locale"].apply(
|
| 105 |
lambda x: re.sub(r"-[A-Z]{2}$", "", x)
|
| 106 |
)
|
| 107 |
commonvoice_stats["bcp_47"] = commonvoice_stats["bcp_47"].apply(
|
| 108 |
lambda x: standardize_tag(x, macro=True)
|
| 109 |
) # this does not really seem to get macrolanguages though, e.g. not for Quechua
|
| 110 |
+
commonvoice_stats = (
|
| 111 |
+
commonvoice_stats.groupby("bcp_47")
|
| 112 |
+
.agg({"commonvoice_hours": "sum", "commonvoice_locale": "first"})
|
| 113 |
+
.reset_index()
|
| 114 |
+
)
|
| 115 |
|
| 116 |
# merge data
|
| 117 |
languages = pd.merge(
|
|
|
|
| 157 |
raise Exception(response)
|
| 158 |
return response
|
| 159 |
|
| 160 |
+
|
| 161 |
async def translate(model, target_language, sentence):
|
| 162 |
script = script_name(target_language.iso15924)
|
| 163 |
reply = await complete(
|
|
|
|
| 179 |
|
| 180 |
|
| 181 |
def load_sentences(language):
|
| 182 |
+
return open(
|
| 183 |
+
f"{benchmark_dir}/dev.{language.iso639_3}_{language.iso15924}"
|
| 184 |
+
).readlines()
|
| 185 |
|
| 186 |
|
| 187 |
# evaluation!
|
|
|
|
| 207 |
original_sentences, target_languages.itertuples()
|
| 208 |
)
|
| 209 |
]
|
| 210 |
+
predictions = await tqdm_asyncio.gather(
|
| 211 |
+
*predictions,
|
| 212 |
+
miniters=1,
|
| 213 |
+
desc=f"{language.name} {model.split('/')[0]}",
|
| 214 |
+
)
|
| 215 |
target_sentences = [
|
| 216 |
load_sentences(lang)[i]
|
| 217 |
for i, lang in enumerate(target_languages.itertuples())
|
|
|
|
| 242 |
"bleu": mean([s["bleu"] for s in scores]) if scores else None,
|
| 243 |
# "bert_score": mean([s["bert_score"] for s in scores]),
|
| 244 |
"commonvoice_hours": language.commonvoice_hours,
|
| 245 |
+
"commonvoice_locale": language.commonvoice_locale,
|
| 246 |
}
|
| 247 |
)
|
| 248 |
with open("results.json", "w") as f:
|
results.json
CHANGED
|
@@ -10,7 +10,8 @@
|
|
| 10 |
}
|
| 11 |
],
|
| 12 |
"bleu": 0.4931825583688982,
|
| 13 |
-
"commonvoice_hours": 2649.0
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"language_name": "Chinese",
|
|
@@ -43,7 +44,8 @@
|
|
| 43 |
}
|
| 44 |
],
|
| 45 |
"bleu": 0.4356399559223496,
|
| 46 |
-
"commonvoice_hours": 422.0
|
|
|
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"language_name": "Hindi",
|
|
@@ -56,7 +58,8 @@
|
|
| 56 |
}
|
| 57 |
],
|
| 58 |
"bleu": 0.42910938007537924,
|
| 59 |
-
"commonvoice_hours": 16.0
|
|
|
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"language_name": "Spanish",
|
|
@@ -69,7 +72,8 @@
|
|
| 69 |
}
|
| 70 |
],
|
| 71 |
"bleu": 0.3335615012680206,
|
| 72 |
-
"commonvoice_hours": 446.0
|
|
|
|
| 73 |
},
|
| 74 |
{
|
| 75 |
"language_name": "Arabic",
|
|
@@ -82,7 +86,8 @@
|
|
| 82 |
}
|
| 83 |
],
|
| 84 |
"bleu": 0.19072998559991275,
|
| 85 |
-
"commonvoice_hours": 91.0
|
|
|
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"language_name": "Urdu",
|
|
@@ -115,7 +120,8 @@
|
|
| 115 |
}
|
| 116 |
],
|
| 117 |
"bleu": 0.32276445473356513,
|
| 118 |
-
"commonvoice_hours": 76.0
|
|
|
|
| 119 |
},
|
| 120 |
{
|
| 121 |
"language_name": "French",
|
|
@@ -128,7 +134,8 @@
|
|
| 128 |
}
|
| 129 |
],
|
| 130 |
"bleu": 0.40595466651226686,
|
| 131 |
-
"commonvoice_hours": 1051.0
|
|
|
|
| 132 |
},
|
| 133 |
{
|
| 134 |
"language_name": "Bangla",
|
|
@@ -141,7 +148,8 @@
|
|
| 141 |
}
|
| 142 |
],
|
| 143 |
"bleu": 0.30570858536443696,
|
| 144 |
-
"commonvoice_hours": 49.0
|
|
|
|
| 145 |
},
|
| 146 |
{
|
| 147 |
"language_name": "Portuguese",
|
|
@@ -174,7 +182,8 @@
|
|
| 174 |
}
|
| 175 |
],
|
| 176 |
"bleu": 0.3778453994295843,
|
| 177 |
-
"commonvoice_hours": 176.0
|
|
|
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"language_name": "Punjabi",
|
|
@@ -187,6 +196,7 @@
|
|
| 187 |
}
|
| 188 |
],
|
| 189 |
"bleu": 0.34311946995454473,
|
| 190 |
-
"commonvoice_hours": 2.3
|
|
|
|
| 191 |
}
|
| 192 |
]
|
|
|
|
| 10 |
}
|
| 11 |
],
|
| 12 |
"bleu": 0.4931825583688982,
|
| 13 |
+
"commonvoice_hours": 2649.0,
|
| 14 |
+
"commonvoice_locale": "en"
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"language_name": "Chinese",
|
|
|
|
| 44 |
}
|
| 45 |
],
|
| 46 |
"bleu": 0.4356399559223496,
|
| 47 |
+
"commonvoice_hours": 422.0,
|
| 48 |
+
"commonvoice_locale": "zh-TW"
|
| 49 |
},
|
| 50 |
{
|
| 51 |
"language_name": "Hindi",
|
|
|
|
| 58 |
}
|
| 59 |
],
|
| 60 |
"bleu": 0.42910938007537924,
|
| 61 |
+
"commonvoice_hours": 16.0,
|
| 62 |
+
"commonvoice_locale": "hi-IN"
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"language_name": "Spanish",
|
|
|
|
| 72 |
}
|
| 73 |
],
|
| 74 |
"bleu": 0.3335615012680206,
|
| 75 |
+
"commonvoice_hours": 446.0,
|
| 76 |
+
"commonvoice_locale": "es"
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"language_name": "Arabic",
|
|
|
|
| 86 |
}
|
| 87 |
],
|
| 88 |
"bleu": 0.19072998559991275,
|
| 89 |
+
"commonvoice_hours": 91.0,
|
| 90 |
+
"commonvoice_locale": "ar"
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"language_name": "Urdu",
|
|
|
|
| 120 |
}
|
| 121 |
],
|
| 122 |
"bleu": 0.32276445473356513,
|
| 123 |
+
"commonvoice_hours": 76.0,
|
| 124 |
+
"commonvoice_locale": "ur"
|
| 125 |
},
|
| 126 |
{
|
| 127 |
"language_name": "French",
|
|
|
|
| 134 |
}
|
| 135 |
],
|
| 136 |
"bleu": 0.40595466651226686,
|
| 137 |
+
"commonvoice_hours": 1051.0,
|
| 138 |
+
"commonvoice_locale": "fr"
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"language_name": "Bangla",
|
|
|
|
| 148 |
}
|
| 149 |
],
|
| 150 |
"bleu": 0.30570858536443696,
|
| 151 |
+
"commonvoice_hours": 49.0,
|
| 152 |
+
"commonvoice_locale": "bn"
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"language_name": "Portuguese",
|
|
|
|
| 182 |
}
|
| 183 |
],
|
| 184 |
"bleu": 0.3778453994295843,
|
| 185 |
+
"commonvoice_hours": 176.0,
|
| 186 |
+
"commonvoice_locale": "pt"
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"language_name": "Punjabi",
|
|
|
|
| 196 |
}
|
| 197 |
],
|
| 198 |
"bleu": 0.34311946995454473,
|
| 199 |
+
"commonvoice_hours": 2.3,
|
| 200 |
+
"commonvoice_locale": "pa-IN"
|
| 201 |
}
|
| 202 |
]
|