import json import os import gradio as gr # Dictionary to store data for each difficulty datasets = {} difficulty_files = { "Easy": "data/scalar_easy.json", "Medium": "data/scalar_medium.json", "Hard": "data/scalar_hard.json" } # Load all json files for difficulty, filepath in difficulty_files.items(): with open(filepath) as f: datasets[difficulty] = json.load(f) # Current state variables current_index = 0 current_difficulty = "Easy" # default difficulty current_data = datasets[current_difficulty] # Update display function with difficulty parameter def update_display(index, difficulty=None): global current_index, current_difficulty, current_data # Update difficulty if provided if difficulty is not None and difficulty != current_difficulty: current_difficulty = difficulty current_data = datasets[difficulty] current_index = 0 index = 0 current_index = index item = current_data[current_index] # escape markdown but keep our color formatting question = item['question'].replace("<", "<").replace(">", ">") # Add styled background with rounded corners colored_question = question.replace( "[MASKED_CITATION]", '[MASKED_CITATION]' ) # Format the citation dictionary with indentation formatted_citation = json.dumps(item["citation"], indent=4, ensure_ascii=False) return colored_question, formatted_citation, item["reference_id"], item['label_id'], index def prev_item(difficulty): global current_index, current_data current_data = datasets[difficulty] current_index = (current_index - 1) % len(current_data) return update_display(current_index, difficulty) def next_item(difficulty): global current_index, current_data current_data = datasets[difficulty] current_index = (current_index + 1) % len(current_data) return update_display(current_index, difficulty) # Create Gradio interface with gr.Blocks() as demo: gr.Markdown(""" ## Data viewer for [SCALAR](https://arxiv.org/abs/2502.13753/) * SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning) is a novel benchmark that leverages academic papers and their citation networks, featuring automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination. * The benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve. """) with gr.Column(): with gr.Row(): # Display current index with gr.Column(): # Add difficulty selector difficulty_dropdown = gr.Dropdown( choices=list(difficulty_files.keys()), value="Easy", label="Difficulty Level" ) index_slider = gr.Slider(minimum=0, maximum=len(datasets["Easy"]) - 1, step=1, label="Select Index", value=0) with gr.Row(): prev_btn = gr.Button("Previous") next_btn = gr.Button("Next") # Display question and citation with gr.Column(): citation_text = gr.TextArea(label="Citation", interactive=False) reference_id = gr.Textbox(label="Reference ID", interactive=False) label_id = gr.Textbox(label="Label ID", interactive=False) with gr.Row(): question_md = gr.Markdown(label="Question") # Bind events difficulty_dropdown.change( fn=lambda d: gr.Slider(minimum=0, maximum=len(datasets[d]) - 1, step=1, value=0), inputs=[difficulty_dropdown], outputs=[index_slider] ) index_slider.change( update_display, inputs=[index_slider, difficulty_dropdown], outputs=[question_md, citation_text, reference_id, label_id, index_slider] ) prev_btn.click( prev_item, inputs=[difficulty_dropdown], outputs=[question_md, citation_text, reference_id, label_id, index_slider] ) next_btn.click( next_item, inputs=[difficulty_dropdown], outputs=[question_md, citation_text, reference_id, label_id, index_slider] ) # Initialize display demo.load( update_display, inputs=[index_slider, difficulty_dropdown], outputs=[question_md, citation_text, reference_id, label_id, index_slider] ) # Launch interface demo.launch()