File size: 3,094 Bytes
cd38731
 
efe64cb
cd38731
 
 
 
 
 
 
 
 
 
 
efe64cb
 
 
 
 
 
cd38731
 
 
 
 
 
 
 
 
 
 
 
 
efe64cb
cd38731
 
efe64cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd38731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efe64cb
cd38731
 
efe64cb
 
cd38731
 
efe64cb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
from datasets import load_dataset
import numpy as np

st.set_page_config(page_title="High-Level dataset")

FIELDS = ["scene", "action", "rationale", "object"]
QS = {
    "scene": "Where is the picture taken?",
    "action": "What is the subject doing?",
    "rationale": "Why is the subject doing it?"
}
SPLITS = ["test", "train"]

AVG_PURITY = 1.10

AVG_DIVERSITY = 0.872819
MIN_DIVERSITY = 0
MAX_DIVERSITY = 100

@st.cache
def load_data(split):

    dataset = load_dataset("michelecafagna26/hl")

    coco2id = {int(dataset[split][i]['file_name'].replace("COCO_train2014_", "").replace(".jpg", "")): i for i in
               range(len(dataset[split]))}

    return dataset, coco2id


def write_obj(dataset, img_id, options, split, list_type="num", show_questions=False,
              show_conf=False):

    st.image(dataset[split][img_id]['image'])

    item_purity = np.mean([np.mean(dataset[split][img_id]['purity'][k]) for k in dataset[split][img_id]['purity']])
    item_diversity = np.mean(list(dataset[split][img_id]['diversity'].values()))

    # normalize
    item_diversity = 1-(item_diversity-MIN_DIVERSITY)/(MAX_DIVERSITY-MIN_DIVERSITY)

    col1, col2 = st.columns(2)

    col1.metric(label="Diversity score",
                value=round(item_diversity, 2),
                delta=round(item_diversity - AVG_DIVERSITY, 2),
                help="Item's internal lexical diversity.\n Positive delta means higher then the average")

    col2.metric(label="Purity score",
                value=round(item_purity, 2),
                delta=round(item_purity - AVG_PURITY, 2),
                help="Item's internal semantic similarity.\n Positive delta means higher then the average")

    for field in options:

        st.markdown(f"## {field.capitalize()}")

        if show_questions and field != "object":
            st.markdown(f" Question: _{QS[field]}_")

        for n, annotation in enumerate(dataset[split][img_id][field]):

            col1, col2 = st.columns(2)

            if list_type == "num":
                col1.markdown(f"{n + 1}. {annotation}")
            else:
                col1.markdown(f"{list_type} {annotation}")

            if show_conf and field != "object":
                col2.metric(label="confidence score",
                            value=dataset[split][img_id]['confidence'][field][n])


def main():
    st.title('High-Level Dataset')

    show_questions = st.sidebar.checkbox('Questions')
    show_conf = st.sidebar.checkbox('Confidence scores')
    options = st.sidebar.multiselect(
        'Choose the annotations',
        FIELDS,
        default=FIELDS)

    split = st.sidebar.selectbox(
        'Split',
        SPLITS)

    dataset, coco2id = load_data(split)

    # sidebar
    choosen_image = st.selectbox(
        'Select an image',
        list(coco2id.keys()),
        help="write a key like: 7603"
    )

    write_obj(dataset, coco2id[choosen_image], options=options, split=split, list_type="num",
              show_questions=show_questions, show_conf=show_conf)


if __name__ == "__main__":
    main()