Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| import ast | |
| import argparse | |
| import glob | |
| import pickle | |
| import gradio as gr | |
| import numpy as np | |
| import pandas as pd | |
| block_css = """ | |
| #notice_markdown { | |
| font-size: 104% | |
| } | |
| #notice_markdown th { | |
| display: none; | |
| } | |
| #notice_markdown td { | |
| padding-top: 6px; | |
| padding-bottom: 6px; | |
| } | |
| #leaderboard_markdown { | |
| font-size: 104% | |
| } | |
| #leaderboard_markdown td { | |
| padding-top: 6px; | |
| padding-bottom: 6px; | |
| } | |
| #leaderboard_dataframe td { | |
| line-height: 0.1em; | |
| } | |
| footer { | |
| display:none !important | |
| } | |
| .image-container { | |
| display: flex; | |
| align-items: center; | |
| padding: 1px; | |
| } | |
| .image-container img { | |
| margin: 0 30px; | |
| height: 20px; | |
| max-height: 100%; | |
| width: auto; | |
| max-width: 20%; | |
| } | |
| """ | |
| def model_hyperlink(model_name, link): | |
| return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
| def load_leaderboard_table_csv(filename, add_hyperlink=True): | |
| lines = open(filename).readlines() | |
| heads = [v.strip() for v in lines[0].split(",")] | |
| rows = [] | |
| for i in range(1, len(lines)): | |
| row = [v.strip() for v in lines[i].split(",")] | |
| for j in range(len(heads)): | |
| item = {} | |
| for h, v in zip(heads, row): | |
| if h != "Model" and h != "Link" and h != "Language Model" and h != "Open Source": | |
| item[h] = int(v) | |
| else: | |
| item[h] = v | |
| if add_hyperlink: | |
| item["Model"] = model_hyperlink(item["Model"], item["Link"]) | |
| rows.append(item) | |
| return rows | |
| def get_arena_table(model_table_df): | |
| # sort by rating | |
| model_table_df = model_table_df.sort_values(by=["Final Score"], ascending=False) | |
| values = [] | |
| for i in range(len(model_table_df)): | |
| row = [] | |
| model_key = model_table_df.index[i] | |
| model_name = model_table_df["Model"].values[model_key] | |
| # rank | |
| row.append(i + 1) | |
| # model display name | |
| row.append(model_name) | |
| row.append( | |
| model_table_df["Language Model"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Open Source"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Text Recognition"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Scene Text-Centric VQA"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Doc-Oriented VQA"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["KIE"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["HMER"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Final Score"].values[model_key] | |
| ) | |
| values.append(row) | |
| return values | |
| def get_recog_table(model_table_df): | |
| # sort by rating | |
| values = [] | |
| for i in range(len(model_table_df)): | |
| row = [] | |
| model_key = model_table_df.index[i] | |
| model_name = model_table_df["Model"].values[model_key] | |
| # rank | |
| row.append(i + 1) | |
| # model display name | |
| row.append(model_name) | |
| row.append( | |
| model_table_df["Language Model"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Open Source"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Regular Text"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Irregular Text"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Artistic Text"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Handwriting"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Digit string"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["Non-semantic Text"].values[model_key] | |
| ) | |
| row.append( | |
| model_table_df["ALL"].values[model_key] | |
| ) | |
| values.append(row) | |
| return values | |
| def build_leaderboard_tab(leaderboard_table_file, text_recog_file, Inaccessible_model_file, show_plot=False): | |
| if leaderboard_table_file: | |
| data = load_leaderboard_table_csv(leaderboard_table_file) | |
| data_recog = load_leaderboard_table_csv(text_recog_file) | |
| data_Inaccessible = load_leaderboard_table_csv(Inaccessible_model_file) | |
| model_table_df = pd.DataFrame(data) | |
| model_table_df_Inaccessible = pd.DataFrame(data_Inaccessible) | |
| recog_table_df = pd.DataFrame(data_recog) | |
| md_head = f""" | |
| # π OCRBench Leaderboard | |
| | [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2305.07895) | | |
| """ | |
| gr.Markdown(md_head, elem_id="leaderboard_markdown") | |
| with gr.Tabs() as tabs: | |
| # arena table | |
| with gr.Tab("OCRBench", id=0): | |
| arena_table_vals = get_arena_table(model_table_df) | |
| md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation." | |
| gr.Markdown(md, elem_id="leaderboard_markdown") | |
| gr.Dataframe( | |
| headers=[ | |
| "Rank", | |
| "Name", | |
| "Language Model", | |
| "Open Source", | |
| "Text Recognition", | |
| "Scene Text-Centric VQA", | |
| "Doc-Oriented VQA", | |
| "KIE", | |
| "HMER", | |
| "Final Score", | |
| ], | |
| datatype=[ | |
| "str", | |
| "markdown", | |
| "str", | |
| "str", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| ], | |
| value=arena_table_vals, | |
| elem_id="arena_leaderboard_dataframe", | |
| height=700, | |
| column_widths=[60, 120,150,100, 150, 200, 180, 80, 80, 160], | |
| wrap=True, | |
| ) | |
| with gr.Tab("Text Recognition", id=1): | |
| arena_table_vals = get_recog_table(recog_table_df) | |
| md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation." | |
| gr.Markdown(md, elem_id="leaderboard_markdown") | |
| gr.Dataframe( | |
| headers=[ | |
| "Rank", | |
| "Name", | |
| "Language Model", | |
| "Open Source", | |
| "Regular Text", | |
| "Irregular Text", | |
| "Artistic Text", | |
| "Handwriting", | |
| "Digit string", | |
| "Non-semantic Text", | |
| "ALL", | |
| ], | |
| datatype=[ | |
| "str", | |
| "markdown", | |
| "str", | |
| "str", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| ], | |
| value=arena_table_vals, | |
| elem_id="arena_leaderboard_dataframe", | |
| height=700, | |
| column_widths=[60, 120,150,100, 100, 100, 100, 100, 100,100, 80], | |
| wrap=True, | |
| ) | |
| with gr.Tab("Inaccessible Model", id=2): | |
| arena_table_vals = get_arena_table(model_table_df_Inaccessible) | |
| md = "The models on this list are neither open-source nor have API call interfaces available." | |
| gr.Markdown(md, elem_id="leaderboard_markdown") | |
| gr.Dataframe( | |
| headers=[ | |
| "Rank", | |
| "Name", | |
| "Language Model", | |
| "Open Source", | |
| "Text Recognition", | |
| "Scene Text-Centric VQA", | |
| "Doc-Oriented VQA", | |
| "KIE", | |
| "HMER", | |
| "Final Score", | |
| ], | |
| datatype=[ | |
| "str", | |
| "markdown", | |
| "str", | |
| "str", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| "number", | |
| ], | |
| value=arena_table_vals, | |
| elem_id="arena_leaderboard_dataframe", | |
| height=700, | |
| column_widths=[60, 120,150,100, 150, 200, 180, 80, 80, 160], | |
| wrap=True, | |
| ) | |
| else: | |
| pass | |
| md_tail = f""" | |
| # Notice | |
| To reduce false positives, we filter out questions that have answers containing fewer than 4 symbols from all datasets. Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench. | |
| If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at [email protected]. We will update the leaderboard in time.""" | |
| gr.Markdown(md_tail, elem_id="leaderboard_markdown") | |
| def build_demo(leaderboard_table_file, recog_table_file, Inaccessible_model_file): | |
| text_size = gr.themes.sizes.text_lg | |
| with gr.Blocks( | |
| title="OCRBench Leaderboard", | |
| theme=gr.themes.Base(text_size=text_size), | |
| css=block_css, | |
| ) as demo: | |
| leader_components = build_leaderboard_tab( | |
| leaderboard_table_file, recog_table_file,Inaccessible_model_file,show_plot=True | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--share", action="store_true") | |
| parser.add_argument("--OCRBench_file", type=str, default="./OCRBench.csv") | |
| parser.add_argument("--TextRecognition_file", type=str, default="./TextRecognition.csv") | |
| parser.add_argument("--Inaccessible_model_file", type=str, default="./Inaccessible_model.csv") | |
| args = parser.parse_args() | |
| demo = build_demo(args.OCRBench_file, args.TextRecognition_file, args.Inaccessible_model_file) | |
| demo.launch() | 
