import gradio as gr from app.utils import add_rank_and_format, filter_models, get_refresh_function from data.model_handler import ModelHandler METRICS = ["ndcg_at_5", "recall_at_1"] def main(): model_handler = ModelHandler() initial_metric = "ndcg_at_5" data = model_handler.get_vidore_data(initial_metric) data = add_rank_and_format(data) NUM_DATASETS = len(data.columns) - 3 NUM_SCORES = len(data) * NUM_DATASETS NUM_MODELS = len(data) css = """ table > thead { white-space: normal } table { --cell-width-1: 250px } table > tbody > tr > td:nth-child(2) > div { overflow-x: auto } .filter-checkbox-group { max-width: max-content; } #markdown size .markdown { font-size: 1rem; } """ with gr.Blocks(css=css) as block: with gr.Tabs(): with gr.TabItem("🏆 Leaderboard"): gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 📚🔍") gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀") gr.Markdown( """ Visual Document Retrieval Benchmark leaderboard. To submit, refer to the corresponding tab. Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models. """ ) datasets_columns = list(data.columns[3:]) anchor_columns = list(data.columns[:3]) default_columns = anchor_columns + datasets_columns with gr.Row(): metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") research_textbox = gr.Textbox(placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", ) column_checkboxes = gr.CheckboxGroup(choices=datasets_columns, value=default_columns, label="Select Columns to Display") with gr.Row(): datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1) dataframe = gr.Dataframe(data, datatype=datatype, type="pandas") def update_data(metric, search_term, selected_columns): data = model_handler.get_vidore_data(metric) data = add_rank_and_format(data) data = filter_models(data, search_term) if selected_columns: selected_columns = selected_columns data = data[selected_columns] return data with gr.Row(): refresh_button = gr.Button("Refresh") refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20) # Automatically refresh the dataframe when the dropdown value changes metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe) research_textbox.submit( lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), inputs=[metric_dropdown, research_textbox, column_checkboxes], outputs=dataframe ) column_checkboxes.change( lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), inputs=[metric_dropdown, research_textbox, column_checkboxes], outputs=dataframe ) #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe) gr.Markdown( f""" - **Total Datasets**: {NUM_DATASETS} - **Total Scores**: {NUM_SCORES} - **Total Models**: {NUM_MODELS} """ + r""" Please consider citing: ```bibtex INSERT LATER ``` """ ) with gr.TabItem("📚 Submit your model"): gr.Markdown("# How to Submit a New Model to the Leaderboard") gr.Markdown( """ To submit a new model to the ViDoRe leaderboard, follow these steps: 1. **Evaluate your model**: - You can either follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/tonywu71/vidore-benchmark/) - Use your own evaluation script. 2. **Format your submission file**: - The submission file should be named `results.json`, and therefore in JSON format. - It should have the following structure: ```json { "dataset_name_1": { "metric_1": score_1, "metric_2": score_2, ... }, "dataset_name_2": { "metric_1": score_1, "metric_2": score_2, ... }, } ``` - The dataset names should be the same as viDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d). 3. **Submit your model**: - Create a huggingface model repository with your model and the submission file. - Add the tag 'vidore' to your model. And you're done ! Your model will appear on the leaderboard once it is approved by the ViDoRe team. """ ) block.queue(max_size=10).launch(debug=True) if __name__ == "__main__": main()