File size: 4,744 Bytes
c6ad631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import json
import os
import gradio as gr

# Dictionary to store data for each difficulty
datasets = {}
difficulty_files = {
    "Easy": "data/scalar_easy.json",
    "Medium": "data/scalar_medium.json",
    "Hard": "data/scalar_hard.json"
}

# Load all json files
for difficulty, filepath in difficulty_files.items():
    with open(filepath) as f:
        datasets[difficulty] = json.load(f)

# Current state variables
current_index = 0
current_difficulty = "Easy"  # default difficulty
current_data = datasets[current_difficulty]

# Update display function with difficulty parameter
def update_display(index, difficulty=None):
    global current_index, current_difficulty, current_data
    
    # Update difficulty if provided
    if difficulty is not None and difficulty != current_difficulty:
        current_difficulty = difficulty
        current_data = datasets[difficulty]
        current_index = 0
        index = 0
    
    current_index = index
    item = current_data[current_index]
    
    # escape markdown but keep our color formatting
    question = item['question'].replace("<", "&lt;").replace(">", "&gt;")
    
    # Add styled background with rounded corners
    colored_question = question.replace(
        "[MASKED_CITATION]", 
        '<span style="background-color: #ffebeb; padding: 2px 6px; border-radius: 4px; color: blue;">[MASKED_CITATION]</span>'
    )
    
    # Format the citation dictionary with indentation
    formatted_citation = json.dumps(item["citation"], indent=4, ensure_ascii=False)
    return colored_question, formatted_citation, item["reference_id"], item['label_id'], index

def prev_item(difficulty):
    global current_index, current_data
    current_data = datasets[difficulty]
    current_index = (current_index - 1) % len(current_data)
    return update_display(current_index, difficulty)

def next_item(difficulty):
    global current_index, current_data
    current_data = datasets[difficulty]
    current_index = (current_index + 1) % len(current_data)
    return update_display(current_index, difficulty)

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("""
    ## Data viewer for [SCALAR](https://arxiv.org/abs/2502.13753/)
    * SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning) is a novel benchmark that leverages academic papers and their citation networks, featuring automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination.
    * The benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve.
    """)
    with gr.Column():
        with gr.Row():
            
            # Display current index
            with gr.Column():

                # Add difficulty selector
                difficulty_dropdown = gr.Dropdown(
                    choices=list(difficulty_files.keys()),
                    value="Easy",
                    label="Difficulty Level"
                )

                index_slider = gr.Slider(minimum=0, maximum=len(datasets["Easy"]) - 1, step=1, label="Select Index", value=0)
                with gr.Row():
                    prev_btn = gr.Button("Previous")
                    next_btn = gr.Button("Next")
            
            # Display question and citation
            with gr.Column():
                citation_text = gr.TextArea(label="Citation", interactive=False)
                reference_id = gr.Textbox(label="Reference ID", interactive=False)
                label_id = gr.Textbox(label="Label ID", interactive=False)

        with gr.Row():
            question_md = gr.Markdown(label="Question")

    # Bind events
    difficulty_dropdown.change(
        fn=lambda d: gr.Slider(minimum=0, maximum=len(datasets[d]) - 1, step=1, value=0),
        inputs=[difficulty_dropdown],
        outputs=[index_slider]
    )
    
    index_slider.change(
        update_display,
        inputs=[index_slider, difficulty_dropdown],
        outputs=[question_md, citation_text, reference_id, label_id, index_slider]
    )
    
    prev_btn.click(
        prev_item,
        inputs=[difficulty_dropdown],
        outputs=[question_md, citation_text, reference_id, label_id, index_slider]
    )
    
    next_btn.click(
        next_item,
        inputs=[difficulty_dropdown],
        outputs=[question_md, citation_text, reference_id, label_id, index_slider]
    )

    # Initialize display
    demo.load(
        update_display,
        inputs=[index_slider, difficulty_dropdown],
        outputs=[question_md, citation_text, reference_id, label_id, index_slider]
    )

# Launch interface
demo.launch()