Revise CyberGym
Browse files- app.py +3 -3
- gen_table.py +2 -2
app.py
CHANGED
|
@@ -110,7 +110,7 @@ head_style) as demo:
|
|
| 110 |
)
|
| 111 |
s.headers = s.check_box['essential'] + s.checkbox_group.value
|
| 112 |
|
| 113 |
-
if benchmark
|
| 114 |
with gr.Row():
|
| 115 |
s.model_name = gr.Textbox(
|
| 116 |
value='Input the Model Name (fuzzy, case insensitive)',
|
|
@@ -137,7 +137,7 @@ head_style) as demo:
|
|
| 137 |
s = structs[benchmark_list.index(dataset_name)]
|
| 138 |
headers = s.check_box['essential'] + fields
|
| 139 |
df = cp.deepcopy(s.table)
|
| 140 |
-
if dataset_name
|
| 141 |
default_val = 'Input the Model Name (fuzzy, case insensitive)'
|
| 142 |
else:
|
| 143 |
default_val = 'Input the Agent Name (fuzzy, case insensitive)'
|
|
@@ -145,7 +145,7 @@ head_style) as demo:
|
|
| 145 |
if model_name != default_val:
|
| 146 |
print(model_name)
|
| 147 |
model_name = model_name.lower()
|
| 148 |
-
if dataset_name
|
| 149 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
|
| 150 |
else:
|
| 151 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
|
|
|
|
| 110 |
)
|
| 111 |
s.headers = s.check_box['essential'] + s.checkbox_group.value
|
| 112 |
|
| 113 |
+
if benchmark not in ["SWE-bench-verified", "CyberGym"]:
|
| 114 |
with gr.Row():
|
| 115 |
s.model_name = gr.Textbox(
|
| 116 |
value='Input the Model Name (fuzzy, case insensitive)',
|
|
|
|
| 137 |
s = structs[benchmark_list.index(dataset_name)]
|
| 138 |
headers = s.check_box['essential'] + fields
|
| 139 |
df = cp.deepcopy(s.table)
|
| 140 |
+
if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
|
| 141 |
default_val = 'Input the Model Name (fuzzy, case insensitive)'
|
| 142 |
else:
|
| 143 |
default_val = 'Input the Agent Name (fuzzy, case insensitive)'
|
|
|
|
| 145 |
if model_name != default_val:
|
| 146 |
print(model_name)
|
| 147 |
model_name = model_name.lower()
|
| 148 |
+
if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
|
| 149 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
|
| 150 |
else:
|
| 151 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
|
gen_table.py
CHANGED
|
@@ -54,7 +54,7 @@ def BUILD_L2_DF(results, benchmark):
|
|
| 54 |
model_list=list(set(model_list))
|
| 55 |
|
| 56 |
res = defaultdict(list)
|
| 57 |
-
if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified"]:
|
| 58 |
res['Model']=model_list
|
| 59 |
elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
|
| 60 |
res['Agent']=model_list
|
|
@@ -104,7 +104,7 @@ def BUILD_L2_DF(results, benchmark):
|
|
| 104 |
required_fields = all_fields
|
| 105 |
|
| 106 |
check_box = {}
|
| 107 |
-
if benchmark
|
| 108 |
check_box['essential'] = ['Agent']
|
| 109 |
elif benchmark=='PrimeVul':
|
| 110 |
check_box['essential'] = ['Model','Method']
|
|
|
|
| 54 |
model_list=list(set(model_list))
|
| 55 |
|
| 56 |
res = defaultdict(list)
|
| 57 |
+
if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified","CyberGym"]:
|
| 58 |
res['Model']=model_list
|
| 59 |
elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
|
| 60 |
res['Agent']=model_list
|
|
|
|
| 104 |
required_fields = all_fields
|
| 105 |
|
| 106 |
check_box = {}
|
| 107 |
+
if benchmark in ["SWE-bench-verified", "CyberGym"]:
|
| 108 |
check_box['essential'] = ['Agent']
|
| 109 |
elif benchmark=='PrimeVul':
|
| 110 |
check_box['essential'] = ['Model','Method']
|