File size: 4,944 Bytes
5d57406
 
 
 
 
 
 
 
 
 
6de388e
 
5d57406
 
 
6de388e
5d57406
6de388e
 
 
 
 
 
5d57406
6de388e
 
5d57406
 
 
 
6de388e
 
 
 
a77e097
6de388e
 
 
 
 
a3012a1
6de388e
 
 
 
 
 
 
 
a3012a1
6de388e
 
a3012a1
6de388e
 
 
 
 
a3012a1
 
6de388e
5d57406
 
 
 
 
 
 
 
a3012a1
6de388e
 
 
 
 
 
a3012a1
 
6de388e
 
5aa60a6
6de388e
 
5d57406
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import json
import os
import glob
import argparse
import csv


def chatgpt_json(merge_file):
    # chat results
    merge_data = merge_file.decode("utf-8")
    merge_data = merge_data.replace(": true,", ": \"true\",")
    merge_data = merge_data.replace(": false,", ": \"false\",")
    merge_data = eval(merge_data)

    dataset_scores_dict = {}
    for dataset_name, dataset_results in merge_data.items():

        correct, total_nums = 0, 0
        for id in dataset_results:
            for dim in dataset_results[id]:
                for result in dataset_results[id][dim]:
                    correct += result['rating']
                    total_nums += 1

        dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
        # dataset_scores_dict[dataset_name] = round(correct / total_nums , 4)
    return dataset_scores_dict


def compute_scores(merge_file):

    merge_data = merge_file.decode("utf-8")
    merge_data = merge_data.replace(": true,", ": \"true\",")
    merge_data = merge_data.replace(": false,", ": \"false\",")
    merge_data = merge_data.replace(": null,", ": \"null\",")
    merge_data = eval(merge_data)

    dataset_scores_dict = {}
    total_correct, total_num = 0, 0
    eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg']
    dim_correct, dim_total = {dim: 0 for dim in eval_dims if dim!='avg'}, {dim: 0 for dim in eval_dims if dim!='avg'}
    for dataset_name, dataset_results in merge_data.items():

        dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims}
        for id in dataset_results:
            for dim in dataset_results[id]:
                for result in dataset_results[id][dim]:
                    dataset_correct['avg'] += result['rating']
                    dataset_correct[dim] += result['rating']
                    dim_correct[dim] += result['rating']
                    dataset_num['avg'] += 1
                    dataset_num[dim] += 1
                    dim_total[dim] += 1

        total_correct += dataset_correct['avg']
        total_num += dataset_num['avg']
        for dim in eval_dims:
            dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2)
    for dim in dim_correct:
        dataset_scores_dict[f"avg_{dim}"] = round(dim_correct[dim] / dim_total[dim] * 100, 2)
    dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2)

    # print(dataset_score_dict)
    # with open(args.score_output_file, 'w', encoding='utf-8') as f:
    #   json.dump(dataset_score_dict, f, indent=2)
    # print(f'{args.score_output_file} is saved!')
    # ========================
    data = [

        ["Avg. All", "Avg. Action", "Avg. Direction", "Avg. Speed", "Avg. Event Order", "Avg. Attribute Change", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
         "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", 
                "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
                "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
                "Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation",
                "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"],

        [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_action"], dataset_scores_dict["avg_direction"], dataset_scores_dict["avg_speed"], dataset_scores_dict["avg_order"], dataset_scores_dict["avg_attribute_change"], 
         dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
         dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], 
         dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], 
         dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], 
         dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'], 
         dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'], 
         ],
    ]


    return data