LanguageBind commited on
Commit
6ca2788
·
1 Parent(s): 84f5285

Update src/compute.py

Browse files
Files changed (1) hide show
  1. src/compute.py +125 -4
src/compute.py CHANGED
@@ -1,3 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  import os
3
  import glob
@@ -23,7 +143,8 @@ def chatgpt_json(merge_file):
23
  if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
24
  correct += 1
25
 
26
- dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
 
27
  return dataset_scores_dict
28
 
29
 
@@ -63,21 +184,21 @@ def compute_scores(merge_file):
63
  exclusive_understanding_score = 0
64
  # import ipdb; ipdb.set_trace()
65
  for dataset_name, weight in exclusive_understanding_weight.items():
66
- exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum
67
 
68
  # Prior Knowledge-based Question-answer
69
  prior_QA_weight = dataset_weight[2]
70
  weights_sum = sum(prior_QA_weight.values())
71
  prior_QA_score = 0
72
  for dataset_name, weight in prior_QA_weight.items():
73
- prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
74
 
75
  # Comprehension and Decision-making
76
  com_and_dec_QA_weight = dataset_weight[3]
77
  weights_sum = sum(com_and_dec_QA_weight.values())
78
  com_and_dec_QA_score = 0
79
  for dataset_name, weight in com_and_dec_QA_weight.items():
80
- com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
81
 
82
  dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
83
  dataset_score_dict['Prior_Knowledge'] = prior_QA_score
 
1
+ # import json
2
+ # import os
3
+ # import glob
4
+ # import argparse
5
+ # import csv
6
+ #
7
+ #
8
+ # def chatgpt_json(merge_file):
9
+ # # chat results
10
+ # merge_data = merge_file.decode("utf-8")
11
+ # merge_data = eval(merge_data)
12
+ # correct_answer_file = 'file/ANSWER.json'
13
+ # with open(correct_answer_file, 'r', encoding='utf-8') as f:
14
+ # correct_answer_data = json.load(f)
15
+ #
16
+ # dataset_scores_dict = {}
17
+ # for dataset_name, item in merge_data.items():
18
+ #
19
+ # total_nums = len(item)
20
+ # correct = 0
21
+ # # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!'
22
+ # for id, sub_item in item.items():
23
+ # if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
24
+ # correct += 1
25
+ #
26
+ # dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
27
+ # return dataset_scores_dict
28
+ #
29
+ #
30
+ # def compute_scores(merge_file):
31
+ # dataset_score_dict = chatgpt_json(merge_file)
32
+ # dataset_weight = {
33
+ # 1:
34
+ # {
35
+ # "ActivityNet": 1,
36
+ # "MSVD": 1,
37
+ # "MSRVTT": 1,
38
+ # "TGIF": 1,
39
+ # "Youcook2": 1,
40
+ # "Ucfcrime": 1,
41
+ # "MOT": 0.5,
42
+ # },
43
+ #
44
+ # 2:
45
+ # {
46
+ # "TVQA": 1,
47
+ # "MV": 1,
48
+ # "NBA": 1,
49
+ # },
50
+ #
51
+ # 3:
52
+ # {
53
+ # "Driving-exam": 0.5,
54
+ # "Driving-decision-making": 1,
55
+ # "SQA3D": 1,
56
+ # }
57
+ #
58
+ # }
59
+ #
60
+ # # Video-exclusive Understanding score
61
+ # exclusive_understanding_weight = dataset_weight[1]
62
+ # weights_sum = sum(exclusive_understanding_weight.values())
63
+ # exclusive_understanding_score = 0
64
+ # # import ipdb; ipdb.set_trace()
65
+ # for dataset_name, weight in exclusive_understanding_weight.items():
66
+ # exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum
67
+ #
68
+ # # Prior Knowledge-based Question-answer
69
+ # prior_QA_weight = dataset_weight[2]
70
+ # weights_sum = sum(prior_QA_weight.values())
71
+ # prior_QA_score = 0
72
+ # for dataset_name, weight in prior_QA_weight.items():
73
+ # prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
74
+ #
75
+ # # Comprehension and Decision-making
76
+ # com_and_dec_QA_weight = dataset_weight[3]
77
+ # weights_sum = sum(com_and_dec_QA_weight.values())
78
+ # com_and_dec_QA_score = 0
79
+ # for dataset_name, weight in com_and_dec_QA_weight.items():
80
+ # com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum
81
+ #
82
+ # dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
83
+ # dataset_score_dict['Prior_Knowledge'] = prior_QA_score
84
+ # dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score
85
+ #
86
+ # # final score
87
+ # final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3
88
+ # dataset_score_dict['final_score'] = final_score
89
+ #
90
+ # # print(dataset_score_dict)
91
+ # # with open(args.score_output_file, 'w', encoding='utf-8') as f:
92
+ # # json.dump(dataset_score_dict, f, indent=2)
93
+ # # print(f'{args.score_output_file} is saved!')
94
+ # # ========================
95
+ # data = [
96
+ #
97
+ # ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making",
98
+ # "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime",
99
+ # "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"],
100
+ #
101
+ # [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score,
102
+ # dataset_score_dict['ActivityNet'],
103
+ # dataset_score_dict["MSVD"],
104
+ # dataset_score_dict['MSRVTT'],
105
+ # dataset_score_dict['TGIF'],
106
+ # dataset_score_dict['Youcook2'],
107
+ # dataset_score_dict['Ucfcrime'],
108
+ # dataset_score_dict['MOT'],
109
+ # dataset_score_dict['TVQA'],
110
+ # dataset_score_dict['MV'],
111
+ # dataset_score_dict['NBA'],
112
+ # dataset_score_dict['Driving-exam'],
113
+ # dataset_score_dict['Driving-decision-making'],
114
+ # dataset_score_dict['SQA3D'],
115
+ # ],
116
+ # ]
117
+ #
118
+ # return data
119
+ #
120
+
121
  import json
122
  import os
123
  import glob
 
143
  if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']:
144
  correct += 1
145
 
146
+ # dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
147
+ dataset_scores_dict[dataset_name] = round(correct / total_nums , 4)
148
  return dataset_scores_dict
149
 
150
 
 
184
  exclusive_understanding_score = 0
185
  # import ipdb; ipdb.set_trace()
186
  for dataset_name, weight in exclusive_understanding_weight.items():
187
+ exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum * 100
188
 
189
  # Prior Knowledge-based Question-answer
190
  prior_QA_weight = dataset_weight[2]
191
  weights_sum = sum(prior_QA_weight.values())
192
  prior_QA_score = 0
193
  for dataset_name, weight in prior_QA_weight.items():
194
+ prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100
195
 
196
  # Comprehension and Decision-making
197
  com_and_dec_QA_weight = dataset_weight[3]
198
  weights_sum = sum(com_and_dec_QA_weight.values())
199
  com_and_dec_QA_score = 0
200
  for dataset_name, weight in com_and_dec_QA_weight.items():
201
+ com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100
202
 
203
  dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score
204
  dataset_score_dict['Prior_Knowledge'] = prior_QA_score