SCALAR / app.py
gentleman12138's picture
Add large JSON files using Git LFS
c6ad631
import json
import os
import gradio as gr
# Dictionary to store data for each difficulty
datasets = {}
difficulty_files = {
"Easy": "data/scalar_easy.json",
"Medium": "data/scalar_medium.json",
"Hard": "data/scalar_hard.json"
}
# Load all json files
for difficulty, filepath in difficulty_files.items():
with open(filepath) as f:
datasets[difficulty] = json.load(f)
# Current state variables
current_index = 0
current_difficulty = "Easy" # default difficulty
current_data = datasets[current_difficulty]
# Update display function with difficulty parameter
def update_display(index, difficulty=None):
global current_index, current_difficulty, current_data
# Update difficulty if provided
if difficulty is not None and difficulty != current_difficulty:
current_difficulty = difficulty
current_data = datasets[difficulty]
current_index = 0
index = 0
current_index = index
item = current_data[current_index]
# escape markdown but keep our color formatting
question = item['question'].replace("<", "&lt;").replace(">", "&gt;")
# Add styled background with rounded corners
colored_question = question.replace(
"[MASKED_CITATION]",
'<span style="background-color: #ffebeb; padding: 2px 6px; border-radius: 4px; color: blue;">[MASKED_CITATION]</span>'
)
# Format the citation dictionary with indentation
formatted_citation = json.dumps(item["citation"], indent=4, ensure_ascii=False)
return colored_question, formatted_citation, item["reference_id"], item['label_id'], index
def prev_item(difficulty):
global current_index, current_data
current_data = datasets[difficulty]
current_index = (current_index - 1) % len(current_data)
return update_display(current_index, difficulty)
def next_item(difficulty):
global current_index, current_data
current_data = datasets[difficulty]
current_index = (current_index + 1) % len(current_data)
return update_display(current_index, difficulty)
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("""
## Data viewer for [SCALAR](https://arxiv.org/abs/2502.13753/)
* SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning) is a novel benchmark that leverages academic papers and their citation networks, featuring automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination.
* The benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve.
""")
with gr.Column():
with gr.Row():
# Display current index
with gr.Column():
# Add difficulty selector
difficulty_dropdown = gr.Dropdown(
choices=list(difficulty_files.keys()),
value="Easy",
label="Difficulty Level"
)
index_slider = gr.Slider(minimum=0, maximum=len(datasets["Easy"]) - 1, step=1, label="Select Index", value=0)
with gr.Row():
prev_btn = gr.Button("Previous")
next_btn = gr.Button("Next")
# Display question and citation
with gr.Column():
citation_text = gr.TextArea(label="Citation", interactive=False)
reference_id = gr.Textbox(label="Reference ID", interactive=False)
label_id = gr.Textbox(label="Label ID", interactive=False)
with gr.Row():
question_md = gr.Markdown(label="Question")
# Bind events
difficulty_dropdown.change(
fn=lambda d: gr.Slider(minimum=0, maximum=len(datasets[d]) - 1, step=1, value=0),
inputs=[difficulty_dropdown],
outputs=[index_slider]
)
index_slider.change(
update_display,
inputs=[index_slider, difficulty_dropdown],
outputs=[question_md, citation_text, reference_id, label_id, index_slider]
)
prev_btn.click(
prev_item,
inputs=[difficulty_dropdown],
outputs=[question_md, citation_text, reference_id, label_id, index_slider]
)
next_btn.click(
next_item,
inputs=[difficulty_dropdown],
outputs=[question_md, citation_text, reference_id, label_id, index_slider]
)
# Initialize display
demo.load(
update_display,
inputs=[index_slider, difficulty_dropdown],
outputs=[question_md, citation_text, reference_id, label_id, index_slider]
)
# Launch interface
demo.launch()