shopping_mmlu_leaderboard / OpenVLM_subset.json
KL4805's picture
Upload OpenVLM_subset.json
b2cdec4 verified
{
"time": "241031154353",
"results": {
"GPT-4o (0513, detail-high)": {
"META": {
"Method": [
"GPT-4o (0513, detail-high)",
"https://openai.com/index/hello-gpt-4o/"
],
"Parameters": "",
"Language Model": "",
"Vision Model": "",
"Org": "OpenAI",
"Time": "2024/05/31",
"Verified": "Yes",
"OpenSource": "No",
"key": 270,
"dir_name": "GPT4o_HIGH"
},
"SEEDBench_IMG": {
"Overall": 77.1,
"Instance Attributes": 79.3,
"Instance Identity": 81.0,
"Instance Interaction": 80.4,
"Instance Location": 72.9,
"Instances Counting": 69.5,
"Scene Understanding": 80.1,
"Spatial Relation": 67.9,
"Text Understanding": 72.6,
"Visual Reasoning": 83.1,
"Overall (official)": "N/A"
},
"CCBench": {
"Overall": 71.2,
"Sketch Reasoning": 91.1,
"Historical Figure": 37.1,
"Calligraphy Painting": 70.2,
"Scenery Building": 89.5,
"Food Clothes": 62.6,
"Cultural Relic": 67.0,
"Traditional Show": 71.2
},
"MMBench_TEST_EN": {
"Overall": 83.4,
"CP": 87.4,
"FP-S": 78.9,
"FP-C": 83.8,
"AR": 86.5,
"LR": 80.3,
"RR": 80.6
},
"MMBench_TEST_CN": {
"Overall": 82.1,
"CP": 87.6,
"FP-S": 76.6,
"FP-C": 83.4,
"AR": 83.7,
"LR": 78.0,
"RR": 80.1
},
"MMBench_TEST_EN_V11": {
"Overall": 83.0,
"AR": 90.2,
"CP": 81.3,
"FP-C": 86.1,
"FP-S": 81.4,
"LR": 78.8,
"RR": 82.2,
"Action Recognition": 93.2,
"Attribute Comparison": 82.7,
"Attribute Recognition": 91.0,
"Celebrity Recognition": 62.6,
"Function Reasoning": 93.3,
"Future Prediction": 82.7,
"Identity Reasoning": 98.7,
"Image Emotion": 81.1,
"Image Quality": 59.7,
"Image Scene": 88.2,
"Image Style": 83.7,
"Image Topic": 97.8,
"Nature Relation": 92.4,
"Object Localization": 84.8,
"Ocr": 98.9,
"Physical Property Reasoning": 78.5,
"Physical Relation": 61.3,
"Social Relation": 89.0,
"Spatial Relationship": 78.7,
"Structuralized Imagetext Understanding": 76.1
},
"MMBench_TEST_CN_V11": {
"Overall": 81.5,
"AR": 86.5,
"CP": 81.5,
"FP-C": 85.0,
"FP-S": 79.1,
"LR": 77.2,
"RR": 79.8,
"Action Recognition": 94.0,
"Attribute Comparison": 81.3,
"Attribute Recognition": 91.0,
"Celebrity Recognition": 57.8,
"Function Reasoning": 94.4,
"Future Prediction": 82.7,
"Identity Reasoning": 97.4,
"Image Emotion": 85.6,
"Image Quality": 58.9,
"Image Scene": 88.2,
"Image Style": 80.4,
"Image Topic": 98.9,
"Nature Relation": 94.6,
"Object Localization": 82.9,
"Ocr": 97.8,
"Physical Property Reasoning": 67.1,
"Physical Relation": 53.3,
"Social Relation": 86.8,
"Spatial Relationship": 74.7,
"Structuralized Imagetext Understanding": 73.4
},
"MME": {
"Overall": 2310.3,
"Perception": 1614.2,
"Cognition": 696.1,
"OCR": 192.5,
"Artwork": 145.2,
"Celebrity": 67.9,
"Code Reasoning": 177.5,
"Color": 185.0,
"Commonsense Reasoning": 178.6,
"Count": 185.0,
"Existence": 185.0,
"Landmark": 182.0,
"Numerical Calculation": 147.5,
"Position": 133.3,
"Posters": 191.2,
"Scene": 147.0,
"Text Translation": 192.5
},
"MMVet": {
"Rec": 67.8,
"Ocr": 76.8,
"Know": 58.3,
"Gen": 56.9,
"Spat": 74.3,
"Math": 76.2,
"Overall": 69.1,
"Overall (official)": "N/A"
},
"MMMU_VAL": {
"Overall": 69.2,
"Art & Design": 72.5,
"Business": 73.3,
"Science": 64.7,
"Health & Medicine": 74.0,
"Humanities & Social Science": 80.8,
"Tech & Engineering": 57.6
},
"MathVista": {
"Overall": 61.3,
"SCI": 64.8,
"TQA": 70.3,
"NUM": 44.4,
"ARI": 58.4,
"VQA": 47.5,
"GEO": 61.5,
"ALG": 62.3,
"GPS": 60.1,
"MWP": 69.9,
"LOG": 43.2,
"FQA": 60.2,
"STA": 68.4
},
"HallusionBench": {
"aAcc": 70.2,
"fAcc": 49.1,
"qAcc": 45.5,
"Overall": 55.0
},
"LLaVABench": {
"Overall": 102.0,
"Conv": 93.6,
"Complex": 111.2,
"Detail": 93.6,
"Overall (official)": "N/A"
},
"AI2D": {
"Overall": 84.6,
"atomStructure": 75.0,
"eclipses": 90.3,
"faultsEarthquakes": 78.6,
"foodChainsWebs": 92.2,
"lifeCycles": 83.5,
"moonPhaseEquinox": 68.2,
"partsOfA": 80.9,
"partsOfTheEarth": 82.7,
"photosynthesisRespiration": 83.5,
"rockCycle": 73.1,
"rockStrata": 87.8,
"solarSystem": 97.2,
"typesOf": 81.0,
"volcano": 100.0,
"waterCNPCycle": 68.2
},
"ScienceQA_VAL": {
"Overall": 89.7,
"Adaptations": 97.9,
"Adaptations and natural selection": 100.0,
"Age of Exploration": 100.0,
"Ancient Egypt and Kush": 100.0,
"Ancient Mesopotamia": 100.0,
"Animals": 100.0,
"Astronomy": 100.0,
"Atoms and molecules": 100.0,
"Basic economic principles": 32.8,
"Chemical reactions": 100.0,
"Cities": 87.5,
"Classification": 98.8,
"Classification and scientific names": 100.0,
"Climate change": 100.0,
"Colonial America": 90.5,
"Context clues": 100.0,
"Descriptive details": 100.0,
"Designing experiments": 100.0,
"Domain-specific vocabulary": 60.0,
"Early 19th century American history": 100.0,
"Early Americas": 50.0,
"Earth events": 100.0,
"Ecological interactions": 76.0,
"Ecosystems": 95.5,
"Engineering practices": 100.0,
"English colonies in North America": 74.4,
"Force and motion": 84.0,
"Fossils": 82.4,
"Genes to traits": 83.0,
"Geography": 98.6,
"Government": 100.0,
"Independent reading comprehension": 100.0,
"Informational texts: level 1": 100.0,
"Magnets": 72.2,
"Maps": 96.8,
"Materials": 96.6,
"Medieval Asia": 100.0,
"Natural resources and human impacts": 100.0,
"Oceania: geography": 59.6,
"Oceans and continents": 100.0,
"Oceans and continents\t": 100.0,
"Particle motion and energy": 92.6,
"Persuasive strategies": 100.0,
"Physical Geography": 83.7,
"Plant reproduction": 90.0,
"Plants": 100.0,
"Plate tectonics": 100.0,
"Read-alone texts": 100.0,
"Rocks and minerals": 100.0,
"Rome and the Byzantine Empire": 100.0,
"Scientific names": 100.0,
"Solutions": 65.7,
"State capitals": 100.0,
"States": 100.0,
"States of matter": 97.4,
"The American Revolution": 100.0,
"The Americas: geography": 83.3,
"The Antebellum period": 100.0,
"The Civil War and Reconstruction": 100.0,
"The Silk Road": 100.0,
"Thermal energy": 100.0,
"Velocity, acceleration, and forces": 68.6,
"Visual elements": 100.0,
"Water cycle": 100.0,
"Weather and climate": 90.6,
"World religions": 100.0
},
"ScienceQA_TEST": {
"Overall": 90.7,
"Adaptations": 100.0,
"Ancient Egypt and Kush": 100.0,
"Ancient Mesopotamia": 100.0,
"Animals": 100.0,
"Astronomy": 100.0,
"Atoms and molecules": 100.0,
"Basic economic principles": 38.0,
"Cells": 100.0,
"Chemical reactions": 100.0,
"Cities": 91.7,
"Classification": 100.0,
"Classification and scientific names": 100.0,
"Climate change": 100.0,
"Colonial America": 81.6,
"Context clues": 100.0,
"Descriptive details": 100.0,
"Designing experiments": 100.0,
"Domain-specific vocabulary": 100.0,
"Early 19th century American history": 100.0,
"Earth events": 100.0,
"Ecological interactions": 66.7,
"Ecosystems": 90.4,
"Engineering practices": 98.2,
"English colonies in North America": 92.3,
"Force and motion": 100.0,
"Fossils": 100.0,
"Genes to traits": 76.3,
"Geography": 95.2,
"Government": 100.0,
"Greece": 100.0,
"Independent reading comprehension": 100.0,
"Informational texts: level 1": 100.0,
"Kinetic and potential energy": 100.0,
"Magnets": 77.3,
"Maps": 97.8,
"Materials": 96.5,
"Medieval Asia": 100.0,
"Oceania: geography": 76.5,
"Oceans and continents": 100.0,
"Oceans and continents\t": 100.0,
"Particle motion and energy": 97.6,
"Persuasive strategies": 100.0,
"Photosynthesis": 100.0,
"Physical Geography": 92.2,
"Plant reproduction": 100.0,
"Plants": 66.7,
"Plate tectonics": 100.0,
"Read-alone texts": 100.0,
"Rocks and minerals": 100.0,
"Scientific names": 100.0,
"Solutions": 72.2,
"State capitals": 100.0,
"States": 94.4,
"States of matter": 100.0,
"The American Revolution": 100.0,
"The Americas: geography": 71.1,
"The Antebellum period": 100.0,
"The Civil War and Reconstruction": 100.0,
"Thermal energy": 95.5,
"Topographic maps": 100.0,
"Velocity, acceleration, and forces": 67.7,
"Visual elements": 100.0,
"Water cycle": 100.0,
"Weather and climate": 91.4,
"World religions": 100.0
},
"OCRBench": {
"Text Recognition": 199,
"Scene Text-centric VQA": 181,
"Doc-oriented VQA": 168,
"Key Information Extraction": 170,
"Handwritten Mathematical Expression Recognition": 18,
"Final Score": 736
},
"MMStar": {
"Overall": 63.9,
"coarse perception": 73.6,
"fine-grained perception": 54.8,
"instance reasoning": 66.4,
"logical reasoning": 72.0,
"math": 66.4,
"science & technology": 50.0
},
"RealWorldQA": {
"Overall": 75.4
},
"POPE": {
"Overall": 85.6,
"acc": 86.7,
"precision": 93.0,
"recall": 79.3
},
"SEEDBench2_Plus": {
"Overall": 72.0,
"chart": 71.4,
"map": 62.0,
"web": 85.2
},
"MMT-Bench_VAL": {
"Overall": 67.3,
"VR": 85.3,
"Loc": 68.1,
"OCR": 82.5,
"Count": 57.2,
"HLN": 75.0,
"IR": 85.0,
"3D": 57.5,
"VC": 87.9,
"VG": 46.2,
"DU": 72.9,
"AR": 51.0,
"PLP": 43.5,
"I2IT": 50.0,
"RR": 76.2,
"IQT": 15.0,
"Emo": 58.3,
"VI": 33.9,
"MemU": 87.5,
"VPU": 84.9,
"AND": 57.0,
"KD": 57.1,
"VCR": 80.0,
"IEJ": 40.0,
"MIA": 42.5,
"CIM": 61.7,
"TU": 49.5,
"VP": 66.7,
"MedU": 74.0,
"AUD": 58.0,
"DKR": 64.6,
"EA": 90.0,
"GN": 46.2,
"abstract_visual_recognition": 85.0,
"action_quality_assessment": 15.0,
"age_gender_race_recognition": 60.0,
"anatomy_identification": 75.0,
"animal_keypoint_detection": 35.0,
"animals_recognition": 100.0,
"animated_character_recognition": 90.0,
"art_design": 81.8,
"artwork_emotion_recognition": 55.0,
"astronomical_recognition": 100.0,
"attribute_hallucination": 80.0,
"behavior_anomaly_detection": 30.0,
"body_emotion_recognition": 40.0,
"building_recognition": 90.0,
"business": 66.7,
"camouflage_object_detection": 55.0,
"celebrity_recognition": 0.0,
"chart_to_table": 95.0,
"chart_to_text": 90.0,
"chart_vqa": 70.0,
"chemical_apparatusn_recognition": 80.0,
"clock_reading": 30.0,
"clothes_keypoint_detection": 70.0,
"color_assimilation": 35.0,
"color_constancy": 14.3,
"color_contrast": 40.0,
"color_recognition": 95.0,
"counting_by_category": 33.8,
"counting_by_reasoning": 95.0,
"counting_by_visual_prompting": 50.0,
"crowd_counting": 50.0,
"deepfake_detection": 60.0,
"depth_estimation": 40.0,
"disaster_recognition": 85.0,
"disease_diagnose": 60.0,
"doc_vqa": 80.0,
"electronic_object_recognition": 100.0,
"eqn2latex": 90.0,
"exist_hallucination": 90.0,
"facail_expression_change_recognition": 95.0,
"face_detection": 90.0,
"face_mask_anomaly_dectection": 70.0,
"face_retrieval": 100.0,
"facial_expression_recognition": 75.0,
"fashion_recognition": 75.0,
"film_and_television_recognition": 95.0,
"font_recognition": 50.0,
"food_recognition": 100.0,
"furniture_keypoint_detection": 55.0,
"gaze_estimation": 10.0,
"general_action_recognition": 95.0,
"geometrical_perspective": 50.0,
"geometrical_relativity": 30.0,
"gesture_recognition": 65.0,
"google_apps": 50.0,
"gui_general": 45.0,
"gui_install": 50.0,
"handwritten_mathematical_expression_recognition": 90.0,
"handwritten_retrieval": 90.0,
"handwritten_text_recognition": 100.0,
"health_medicine": 92.9,
"helmet_anomaly_detection": 90.0,
"human_interaction_understanding": 95.0,
"human_keypoint_detection": 70.0,
"human_object_interaction_recognition": 75.0,
"humanitites_social_science": 54.5,
"image2image_retrieval": 75.0,
"image_based_action_recognition": 95.0,
"image_captioning": 100.0,
"image_captioning_paragraph": 95.0,
"image_colorization": 60.0,
"image_dense_captioning": 68.4,
"image_matting": 15.0,
"image_quality_assessment": 35.0,
"image_season_recognition": 80.0,
"industrial_produce_anomaly_detection": 40.0,
"instance_captioning": 95.0,
"interactive_segmentation": 85.7,
"jigsaw_puzzle_solving": 40.0,
"landmark_recognition": 100.0,
"lesion_grading": 90.0,
"logo_and_brand_recognition": 95.0,
"lvlm_response_judgement": 45.0,
"medical_modality_recognition": 100.0,
"meme_image_understanding": 95.0,
"meme_vedio_understanding": 80.0,
"mevis": 30.0,
"micro_expression_recognition": 20.0,
"multiple_image_captioning": 95.0,
"multiple_instance_captioning": 95.0,
"multiple_view_image_understanding": 10.0,
"muscial_instrument_recognition": 95.0,
"national_flag_recognition": 100.0,
"navigation": 90.0,
"next_img_prediction": 65.0,
"object_detection": 90.0,
"one_shot_detection": 85.0,
"order_hallucination": 50.0,
"other_biological_attributes": 45.0,
"painting_recognition": 90.0,
"person_reid": 95.0,
"pixel_localization": 25.0,
"pixel_recognition": 55.0,
"plant_recognition": 90.0,
"point_tracking": 35.0,
"polygon_localization": 40.0,
"profession_recognition": 90.0,
"ravens_progressive_matrices": 15.0,
"reason_seg": 47.4,
"referring_detection": 45.0,
"relation_hallucination": 80.0,
"religious_recognition": 75.0,
"remote_sensing_object_detection": 60.0,
"rock_recognition": 80.0,
"rotated_object_detection": 77.8,
"salient_object_detection_rgb": 55.0,
"salient_object_detection_rgbd": 50.0,
"scene_emotion_recognition": 65.0,
"scene_graph_recognition": 85.0,
"scene_recognition": 65.0,
"scene_text_recognition": 90.0,
"science": 58.3,
"screenshot2code": 60.0,
"sculpture_recognition": 80.0,
"shape_recognition": 95.0,
"sign_language_recognition": 40.0,
"single_object_tracking": 65.0,
"sketch2code": 50.0,
"sketch2image_retrieval": 95.0,
"small_object_detection": 60.0,
"social_relation_recognition": 50.0,
"som_recognition": 94.7,
"sports_recognition": 95.0,
"spot_the_diff": 10.0,
"spot_the_similarity": 75.0,
"table_structure_recognition": 50.0,
"tech_engineering": 33.3,
"temporal_anticipation": 75.0,
"temporal_localization": 52.6,
"temporal_ordering": 25.0,
"temporal_sequence_understanding": 25.0,
"text2image_retrieval": 55.0,
"texture_material_recognition": 75.0,
"threed_cad_recognition": 70.0,
"threed_indoor_recognition": 45.0,
"traffic_anomaly_detection": 55.0,
"traffic_light_understanding": 100.0,
"traffic_participants_understanding": 60.0,
"traffic_sign_understanding": 95.0,
"transparent_object_detection": 75.0,
"vehicle_keypoint_detection": 55.6,
"vehicle_recognition": 100.0,
"vehicle_retrieval": 85.0,
"video_captioning": 95.0,
"visual_document_information_extraction": 95.0,
"visual_prompt_understanding": 75.0,
"waste_recognition": 100.0,
"weapon_recognition": 100.0,
"weather_recognition": 100.0,
"web_shopping": 40.0,
"whoops": 80.0,
"writing_poetry_from_image": 60.0
},
"BLINK": {
"Overall": 68.0,
"Art_Style": 82.9,
"Counting": 66.7,
"Forensic_Detection": 90.9,
"Functional_Correspondence": 43.1,
"IQ_Test": 32.0,
"Jigsaw": 76.7,
"Multi-view_Reasoning": 58.6,
"Object_Localization": 69.7,
"Relative_Depth": 75.8,
"Relative_Reflectance": 32.8,
"Semantic_Correspondence": 61.2,
"Spatial_Relation": 83.2,
"Visual_Correspondence": 92.4,
"Visual_Similarity": 83.0
},
"QBench": {
"Overall": 78.9,
"type_0_concern_0": 82.4,
"type_0_concern_1": 82.3,
"type_0_concern_2": 81.2,
"type_0_concern_3": 87.1,
"type_1_concern_0": 76.7,
"type_1_concern_1": 84.8,
"type_1_concern_2": 87.0,
"type_1_concern_3": 88.9,
"type_2_concern_0": 66.5,
"type_2_concern_1": 72.4,
"type_2_concern_2": 66.7,
"type_2_concern_3": 80.0
},
"ABench": {
"Overall": 79.2,
"part1 -> bag_of_words -> attribute": 92.7,
"part1 -> bag_of_words -> composition -> arrangement": 86.7,
"part1 -> bag_of_words -> composition -> occlusion": 60.0,
"part1 -> bag_of_words -> composition -> orientation": 76.9,
"part1 -> bag_of_words -> composition -> size": 71.4,
"part1 -> bag_of_words -> counting": 79.6,
"part1 -> bag_of_words -> noun_as_adjective": 81.4,
"part1 -> basic_recognition -> major": 92.9,
"part1 -> basic_recognition -> minor": 93.2,
"part1 -> outside_knowledge -> contradiction overcome": 70.8,
"part1 -> outside_knowledge -> specific-terms -> company": 100.0,
"part1 -> outside_knowledge -> specific-terms -> creature": 83.3,
"part1 -> outside_knowledge -> specific-terms -> daily": 94.1,
"part1 -> outside_knowledge -> specific-terms -> food": 95.5,
"part1 -> outside_knowledge -> specific-terms -> geography": 81.0,
"part1 -> outside_knowledge -> specific-terms -> material": 95.2,
"part1 -> outside_knowledge -> specific-terms -> science": 100.0,
"part1 -> outside_knowledge -> specific-terms -> sports": 68.2,
"part1 -> outside_knowledge -> specific-terms -> style -> abstract": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> art": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> art_deco": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> cubism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> dadaism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> deco": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> expressionism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> fauvism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> futurism": 66.7,
"part1 -> outside_knowledge -> specific-terms -> style -> minimalism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> pop": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> psychedelic": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> steampunk": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> surrealism": 100.0,
"part1 -> outside_knowledge -> specific-terms -> style -> victorian": 0.0,
"part1 -> outside_knowledge -> specific-terms -> vehicle": 94.7,
"part1 -> outside_knowledge -> specific-terms -> weather": 92.3,
"part2 -> aesthetic": 62.6,
"part2 -> generative": 72.4,
"part2 -> technical": 74.9
},
"MTVQA": {
"Overall": 31.2,
"AR": 21.3,
"DE": 35.1,
"FR": 42.2,
"IT": 37.2,
"JA": 19.9,
"KR": 35.1,
"RU": 15.9,
"TH": 26.0,
"VI": 39.6
}
}
}
}