Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
f5248c1
1
Parent(s):
f98b171
Better names for scripts
Browse files
scripts/{print_results.py → count_benchmark_items.py}
RENAMED
|
File without changes
|
scripts/read_score.py
DELETED
|
@@ -1,22 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import os
|
| 3 |
-
import csv
|
| 4 |
-
|
| 5 |
-
folder = "nlp"
|
| 6 |
-
folders = os.listdir(folder)
|
| 7 |
-
out_csv = csv.writer(open("score.csv", "w", newline=""))
|
| 8 |
-
for model in folders:
|
| 9 |
-
tasks = os.listdir(folder+"/"+str(model))
|
| 10 |
-
scores = []
|
| 11 |
-
for task in tasks:
|
| 12 |
-
df = pd.read_json(folder+"/"+str(model)+"/"+str(task))
|
| 13 |
-
model_args = df['config']['model_args']
|
| 14 |
-
results=df['results']
|
| 15 |
-
keys = results.keys()
|
| 16 |
-
if str(keys[0]) == "truthfulqa_mc":
|
| 17 |
-
score = results=df['results'][keys[0]]['mc2']
|
| 18 |
-
else:
|
| 19 |
-
score = results=df['results'][keys[0]]['acc_norm']
|
| 20 |
-
num_fewshot = df['config']['num_fewshot']
|
| 21 |
-
scores.append(score)
|
| 22 |
-
out_csv.writerow([model] + scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|